int Drumm2(const Epetra_Map& map, bool verbose) { //Simple 2-element problem (element as in "finite-element") from //Clif Drumm. Two triangular elements, one per processor, as shown //here: // // *----* // 3|\ 2| // | \ | // | 0\1| // | \| // *----* // 0 1 // //Element 0 on processor 0, element 1 on processor 1. //Processor 0 will own nodes 0,1,3 and processor 1 will own node 2. //Each processor will pass a 3x3 element-matrix to Epetra_FECrsMatrix. //After GlobalAssemble(), the matrix should be as follows: // // row 0: 2 1 0 1 //proc 0 row 1: 1 4 1 2 // row 2: 0 1 2 1 //---------------------------------- //proc 1 row 3: 1 2 1 4 // int numProcs = map.Comm().NumProc(); int localProc = map.Comm().MyPID(); if (numProcs != 2) return(0); int indexBase = 0, ierr = 0; double* values = new double[9]; values[0] = 2.0; values[1] = 1.0; values[2] = 1.0; values[3] = 1.0; values[4] = 2.0; values[5] = 1.0; values[6] = 1.0; values[7] = 1.0; values[8] = 2.0; if (localProc == 0) { int numMyNodes = 3; int* myNodes = new int[numMyNodes]; myNodes[0] = 0; myNodes[1] = 1; myNodes[2] = 3; Epetra_Map Map(-1, numMyNodes, myNodes, indexBase, map.Comm()); int rowLengths = 3; Epetra_FECrsMatrix A(Copy, Map, rowLengths); EPETRA_TEST_ERR( A.InsertGlobalValues(numMyNodes, myNodes, numMyNodes, myNodes, values, Epetra_FECrsMatrix::ROW_MAJOR),ierr); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); if (verbose) { A.Print(cout); } //now let's make sure we can do a matvec with this matrix. Epetra_Vector x(Map), y(Map); x.PutScalar(1.0); EPETRA_TEST_ERR( A.Multiply(false, x, y), ierr); if (verbose&&localProc==0) { cout << "y = A*x, x=1.0's"<<endl; } if (verbose) { y.Print(cout); } delete [] myNodes; delete [] values; } else { int numMyNodes = 1; int* myNodes = new int[numMyNodes]; myNodes[0] = 2; Epetra_Map Map(-1, numMyNodes, myNodes, indexBase, map.Comm()); int rowLengths = 3; Epetra_FECrsMatrix A(Copy, Map, rowLengths); delete [] myNodes; numMyNodes = 3; myNodes = new int[numMyNodes]; myNodes[0] = 1; myNodes[1] = 2; myNodes[2] = 3; EPETRA_TEST_ERR( A.InsertGlobalValues(numMyNodes, myNodes, numMyNodes, myNodes, values, Epetra_FECrsMatrix::ROW_MAJOR),ierr); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); if (verbose) { A.Print(cout); } //now let's make sure we can do a matvec with this matrix. Epetra_Vector x(Map), y(Map); x.PutScalar(1.0); EPETRA_TEST_ERR( A.Multiply(false, x, y), ierr); if (verbose) { y.Print(cout); } delete [] myNodes; delete [] values; } return(0); }
//============================================================================= Epetra_Map * Epetra_Map::RemoveEmptyProcesses() const { #ifdef HAVE_MPI const Epetra_MpiComm * MpiComm = dynamic_cast<const Epetra_MpiComm*>(&Comm()); // If the Comm isn't MPI, just treat this as a copy constructor if(!MpiComm) return new Epetra_Map(*this); MPI_Comm NewComm,MyMPIComm = MpiComm->Comm(); // Create the new communicator. MPI_Comm_split returns a valid // communicator on all processes. On processes where color == MPI_UNDEFINED, // ignore the result. Passing key == 0 tells MPI to order the // processes in the new communicator by their rank in the old // communicator. const int color = (NumMyElements() == 0) ? MPI_UNDEFINED : 1; // MPI_Comm_split must be called collectively over the original // communicator. We can't just call it on processes with color // one, even though we will ignore its result on processes with // color zero. int rv = MPI_Comm_split(MyMPIComm,color,0,&NewComm); if(rv!=MPI_SUCCESS) throw ReportError("Epetra_Map::RemoveEmptyProcesses: MPI_Comm_split failed.",-1); if(color == MPI_UNDEFINED) return 0; // We're not in the new map else { Epetra_MpiComm * NewEpetraComm = new Epetra_MpiComm(NewComm); // Use the copy constructor for a new map, but basically because it does nothing useful Epetra_Map * NewMap = new Epetra_Map(*this); // Get rid of the old BlockMapData, now make a new one from scratch... NewMap->CleanupData(); if(GlobalIndicesInt()) { #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES NewMap->BlockMapData_ = new Epetra_BlockMapData(NumGlobalElements(),0,IndexBase(),*NewEpetraComm,false); #endif } else { #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES NewMap->BlockMapData_ = new Epetra_BlockMapData(NumGlobalElements64(),0,IndexBase64(),*NewEpetraComm,true); #endif } // Now copy all of the relevent bits of BlockMapData... // NewMap->BlockMapData_->Comm_ = NewEpetraComm; NewMap->BlockMapData_->LID_ = BlockMapData_->LID_; #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES NewMap->BlockMapData_->MyGlobalElements_int_ = BlockMapData_->MyGlobalElements_int_; #endif #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES NewMap->BlockMapData_->MyGlobalElements_LL_ = BlockMapData_->MyGlobalElements_LL_; #endif NewMap->BlockMapData_->FirstPointInElementList_ = BlockMapData_->FirstPointInElementList_; NewMap->BlockMapData_->ElementSizeList_ = BlockMapData_->ElementSizeList_; NewMap->BlockMapData_->PointToElementList_ = BlockMapData_->PointToElementList_; NewMap->BlockMapData_->NumGlobalElements_ = BlockMapData_->NumGlobalElements_; NewMap->BlockMapData_->NumMyElements_ = BlockMapData_->NumMyElements_; NewMap->BlockMapData_->IndexBase_ = BlockMapData_->IndexBase_; NewMap->BlockMapData_->ElementSize_ = BlockMapData_->ElementSize_; NewMap->BlockMapData_->MinMyElementSize_ = BlockMapData_->MinMyElementSize_; NewMap->BlockMapData_->MaxMyElementSize_ = BlockMapData_->MaxMyElementSize_; NewMap->BlockMapData_->MinElementSize_ = BlockMapData_->MinElementSize_; NewMap->BlockMapData_->MaxElementSize_ = BlockMapData_->MaxElementSize_; NewMap->BlockMapData_->MinAllGID_ = BlockMapData_->MinAllGID_; NewMap->BlockMapData_->MaxAllGID_ = BlockMapData_->MaxAllGID_; NewMap->BlockMapData_->MinMyGID_ = BlockMapData_->MinMyGID_; NewMap->BlockMapData_->MaxMyGID_ = BlockMapData_->MaxMyGID_; NewMap->BlockMapData_->MinLID_ = BlockMapData_->MinLID_; NewMap->BlockMapData_->MaxLID_ = BlockMapData_->MaxLID_; NewMap->BlockMapData_->NumGlobalPoints_ = BlockMapData_->NumGlobalPoints_; NewMap->BlockMapData_->NumMyPoints_ = BlockMapData_->NumMyPoints_; NewMap->BlockMapData_->ConstantElementSize_ = BlockMapData_->ConstantElementSize_; NewMap->BlockMapData_->LinearMap_ = BlockMapData_->LinearMap_; NewMap->BlockMapData_->DistributedGlobal_ = NewEpetraComm->NumProc()==1 ? false : BlockMapData_->DistributedGlobal_; NewMap->BlockMapData_->OneToOneIsDetermined_ = BlockMapData_->OneToOneIsDetermined_; NewMap->BlockMapData_->OneToOne_ = BlockMapData_->OneToOne_; NewMap->BlockMapData_->GlobalIndicesInt_ = BlockMapData_->GlobalIndicesInt_; NewMap->BlockMapData_->GlobalIndicesLongLong_ = BlockMapData_->GlobalIndicesLongLong_; NewMap->BlockMapData_->LastContiguousGID_ = BlockMapData_->LastContiguousGID_; NewMap->BlockMapData_->LastContiguousGIDLoc_ = BlockMapData_->LastContiguousGIDLoc_; NewMap->BlockMapData_->LIDHash_ = BlockMapData_->LIDHash_ ? new Epetra_HashTable<int>(*BlockMapData_->LIDHash_) : 0; // Delay directory construction NewMap->BlockMapData_->Directory_ = 0; // Cleanup delete NewEpetraComm; return NewMap; } #else // MPI isn't compiled, so just treat this as a copy constructor return new Epetra_Map(*this); #endif }
int main(int argc, char *argv[]) { using Teuchos::RCP; // reference count pointers using Teuchos::rcp; // // // MPI initialization using Teuchos // #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm comm(MPI_COMM_WORLD); #else Epetra_SerialComm comm; #endif // // Parameters // GlobalOrdinal numGlobalElements = 256; // problem size // // Construct the problem // // Construct a Map that puts approximately the same number of equations on each processor const Epetra_Map map(numGlobalElements, 0, comm); // Get update list and number of local equations from newly created map. const size_t numMyElements = map.NumMyElements(); const GlobalOrdinal* myGlobalElements = map.MyGlobalElements(); // Create a CrsMatrix using the map, with a dynamic allocation of 3 entries per row RCP<Epetra_CrsMatrix> A = rcp(new Epetra_CrsMatrix(Copy, map, 3)); // Add rows one-at-a-time for (size_t i = 0; i < numMyElements; i++) { if (myGlobalElements[i] == 0) { //TODO: should be rewritten in an Epetra style A->InsertGlobalValues(myGlobalElements[i], 2, Teuchos::tuple<Scalar> (2.0, -1.0).getRawPtr(), Teuchos::tuple<GlobalOrdinal>(myGlobalElements[i], myGlobalElements[i] +1).getRawPtr()); } else if (myGlobalElements[i] == numGlobalElements - 1) { A->InsertGlobalValues(myGlobalElements[i], 2, Teuchos::tuple<Scalar> (-1.0, 2.0).getRawPtr(), Teuchos::tuple<GlobalOrdinal>(myGlobalElements[i] -1, myGlobalElements[i]).getRawPtr()); } else { A->InsertGlobalValues(myGlobalElements[i], 3, Teuchos::tuple<Scalar> (-1.0, 2.0, -1.0).getRawPtr(), Teuchos::tuple<GlobalOrdinal>(myGlobalElements[i] -1, myGlobalElements[i], myGlobalElements[i] +1).getRawPtr()); } } // Complete the fill, ask that storage be reallocated and optimized A->FillComplete(); // // Construct a multigrid preconditioner // // Turns a Epetra_CrsMatrix into a MueLu::Matrix RCP<Xpetra::CrsMatrix<SC, LO, GO, NO, LMO> > mueluA_ = rcp(new Xpetra::EpetraCrsMatrix(A)); //TODO: should not be needed RCP<Xpetra::Matrix <SC, LO, GO, NO, LMO> > mueluA = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO, LMO>(mueluA_)); // Multigrid Hierarchy RCP<Hierarchy> H = rcp(new Hierarchy(mueluA)); H->setVerbLevel(Teuchos::VERB_HIGH); // Multigrid setup phase (using default parameters) H->Setup(); // // Define RHS / LHS // RCP<Epetra_Vector> X = rcp(new Epetra_Vector(map)); RCP<Epetra_Vector> B = rcp(new Epetra_Vector(map)); X->PutScalar((Scalar) 0.0); B->SetSeed(846930886); B->Random(); #ifndef HAVE_MUELU_BELOS // // Use AMG directly as an iterative solver (not as a preconditionner) // int nIts = 9; // Wrap Epetra Vectors into Xpetra Vectors RCP<Vector> mueluX = rcp(new Xpetra::EpetraVector(X)); RCP<Vector> mueluB = rcp(new Xpetra::EpetraVector(B)); H->Iterate(*mueluB, nIts, *mueluX); // Print relative residual norm ST::magnitudeType residualNorms = Utils::ResidualNorm(*mueluA, *mueluX, *mueluB)[0]; if (comm.MyPID() == 0) std::cout << "||Residual|| = " << residualNorms << std::endl; #else // HAVE_MUELU_BELOS // // Solve Ax = b using AMG as a preconditioner in Belos // // Matrix and Multivector type that will be used with Belos typedef Epetra_MultiVector MV; typedef Belos::OperatorT<MV> OP; // Define Operator and Preconditioner RCP<OP> belosOp = rcp(new Belos::XpetraOp<SC, LO, GO, NO, LMO>(mueluA)); // Turns a Xpetra::Matrix object into a Belos operator RCP<OP> belosPrec = rcp(new Belos::MueLuOp<SC, LO, GO, NO, LMO>(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B)); belosProblem->setLeftPrec(belosPrec); bool set = belosProblem->setProblem(); if (set == false) { std::cout << std::endl << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; return EXIT_FAILURE; } // Belos parameter list int maxIts = 20; double tol = 1e-4; Teuchos::ParameterList belosList; belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails); // Create an iterative solver manager RCP< Belos::SolverManager<SC, MV, OP> > solver = rcp(new Belos::BlockCGSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false))); // Perform solve Belos::ReturnType ret = solver->solve(); // Get the number of iterations for this solve. std::cout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; // Compute actual residuals. int numrhs=1; bool badRes = false; std::vector<SC> actual_resids(numrhs); std::vector<SC> rhs_norm(numrhs); RCP<Epetra_MultiVector > resid = rcp(new Epetra_MultiVector(map, numrhs)); typedef Belos::OperatorTraits<SC, MV, OP> OPT; typedef Belos::MultiVecTraits<SC, MV> MVT; OPT::Apply(*belosOp, *X, *resid); MVT::MvAddMv(-1.0, *resid, 1.0, *B, *resid); MVT::MvNorm(*resid, actual_resids); MVT::MvNorm(*B, rhs_norm); std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl; for (int i = 0; i < numrhs; i++) { SC actRes = actual_resids[i]/rhs_norm[i]; std::cout <<"Problem " << i << " : \t" << actRes <<std::endl; if (actRes > tol) { badRes = true; } } // Check convergence if (ret != Belos::Converged || badRes) { std::cout << std::endl << "ERROR: Belos did not converge! " << std::endl; return EXIT_FAILURE; } std::cout << std::endl << "SUCCESS: Belos converged!" << std::endl; #endif // HAVE_MUELU_BELOS #ifdef HAVE_MPI MPI_Finalize(); #endif return EXIT_SUCCESS; }
Epetra_CrsGraph * BlockUtility::TGenerateBlockGraph( const Epetra_CrsGraph & BaseGraph, const Epetra_CrsGraph & LocalBlockGraph, const Epetra_Comm & GlobalComm ) { const Epetra_BlockMap & BaseRowMap = BaseGraph.RowMap(); const Epetra_BlockMap & BaseColMap = BaseGraph.ColMap(); int_type ROffset = BlockUtility::TCalculateOffset<int_type>(BaseRowMap); (void) ROffset; // Silence "unused variable" compiler warning. int_type COffset = BlockUtility::TCalculateOffset<int_type>(BaseColMap); //Get Base Global IDs const Epetra_BlockMap & BlockRowMap = LocalBlockGraph.RowMap(); const Epetra_BlockMap & BlockColMap = LocalBlockGraph.ColMap(); int NumBlockRows = BlockRowMap.NumMyElements(); vector<int_type> RowIndices(NumBlockRows); BlockRowMap.MyGlobalElements(&RowIndices[0]); int Size = BaseRowMap.NumMyElements(); Epetra_Map *GlobalRowMap = GenerateBlockMap(BaseRowMap, BlockRowMap, GlobalComm); int MaxIndices = BaseGraph.MaxNumIndices(); vector<int_type> Indices(MaxIndices); Epetra_CrsGraph * GlobalGraph = new Epetra_CrsGraph( Copy, dynamic_cast<Epetra_BlockMap&>(*GlobalRowMap), 0 ); int NumBlockIndices, NumBaseIndices; int *BlockIndices, *BaseIndices; for( int i = 0; i < NumBlockRows; ++i ) { LocalBlockGraph.ExtractMyRowView(i, NumBlockIndices, BlockIndices); for( int j = 0; j < Size; ++j ) { int_type GlobalRow = (int_type) GlobalRowMap->GID64(j+i*Size); BaseGraph.ExtractMyRowView( j, NumBaseIndices, BaseIndices ); for( int k = 0; k < NumBlockIndices; ++k ) { int_type ColOffset = (int_type) BlockColMap.GID64(BlockIndices[k]) * COffset; for( int l = 0; l < NumBaseIndices; ++l ) Indices[l] = (int_type) BaseGraph.GCID64(BaseIndices[l]) + ColOffset; GlobalGraph->InsertGlobalIndices( GlobalRow, NumBaseIndices, &Indices[0] ); } } } const Epetra_BlockMap & BaseDomainMap = BaseGraph.DomainMap(); const Epetra_BlockMap & BaseRangeMap = BaseGraph.RangeMap(); const Epetra_BlockMap & BlockDomainMap = LocalBlockGraph.DomainMap(); const Epetra_BlockMap & BlockRangeMap = LocalBlockGraph.RangeMap(); Epetra_Map *GlobalDomainMap = GenerateBlockMap(BaseDomainMap, BlockDomainMap, GlobalComm); Epetra_Map *GlobalRangeMap = GenerateBlockMap(BaseRangeMap, BlockRangeMap, GlobalComm); GlobalGraph->FillComplete(*GlobalDomainMap, *GlobalRangeMap); delete GlobalDomainMap; delete GlobalRangeMap; delete GlobalRowMap; return GlobalGraph; }
// FIXME long long Epetra_Map Epetra_Util::Create_Root_Map(const Epetra_Map& usermap, int root) { int numProc = usermap.Comm().NumProc(); if (numProc==1) { Epetra_Map newmap(usermap); return(newmap); } const Epetra_Comm & comm = usermap.Comm(); bool isRoot = usermap.Comm().MyPID()==root; //if usermap is already completely owned by root then we'll just return a copy of it. int quickreturn = 0; int globalquickreturn = 0; if (isRoot) { if (usermap.NumMyElements()==usermap.NumGlobalElements64()) quickreturn = 1; } else { if (usermap.NumMyElements()==0) quickreturn = 1; } usermap.Comm().MinAll(&quickreturn, &globalquickreturn, 1); if (globalquickreturn==1) { Epetra_Map newmap(usermap); return(newmap); } // Linear map: Simple case, just put all GIDs linearly on root processor if (usermap.LinearMap() && root!=-1) { int numMyElements = 0; if (isRoot) numMyElements = usermap.MaxAllGID64()+1; // FIXME long long Epetra_Map newmap(-1, numMyElements, usermap.IndexBase(), comm); return(newmap); } if (!usermap.UniqueGIDs()) throw usermap.ReportError("usermap must have unique GIDs",-1); // General map // Build IntVector of the GIDs, then ship them to root processor int numMyElements = usermap.NumMyElements(); Epetra_Map allGidsMap(-1, numMyElements, 0, comm); Epetra_IntVector allGids(allGidsMap); for (int i=0; i<numMyElements; i++) allGids[i] = usermap.GID64(i); int numGlobalElements = usermap.NumGlobalElements64(); if (root!=-1) { int n1 = 0; if (isRoot) n1 = numGlobalElements; Epetra_Map allGidsOnRootMap(-1, n1, 0, comm); Epetra_Import importer(allGidsOnRootMap, allGidsMap); Epetra_IntVector allGidsOnRoot(allGidsOnRootMap); allGidsOnRoot.Import(allGids, importer, Insert); Epetra_Map rootMap(-1, allGidsOnRoot.MyLength(), allGidsOnRoot.Values(), usermap.IndexBase(), comm); return(rootMap); } else { int n1 = numGlobalElements; Epetra_LocalMap allGidsOnRootMap(n1, 0, comm); Epetra_Import importer(allGidsOnRootMap, allGidsMap); Epetra_IntVector allGidsOnRoot(allGidsOnRootMap); allGidsOnRoot.Import(allGids, importer, Insert); Epetra_Map rootMap(-1, allGidsOnRoot.MyLength(), allGidsOnRoot.Values(), usermap.IndexBase(), comm); return(rootMap); } }
int TLowCommunicationMakeColMapAndReindex(int N, const int * rowptr, int * colind_LID, const int_type *colind_GID, const Epetra_Map& domainMap, const int * owningPIDs, bool SortGhostsAssociatedWithEachProcessor, std::vector<int>& RemotePIDs, MapType1 & NewColMap) { int i,j; // Sanity checks bool UseLL; if(domainMap.GlobalIndicesLongLong()) UseLL=true; else if(domainMap.GlobalIndicesInt()) UseLL=false; else throw std::runtime_error("LowCommunicationMakeColMapAndReindex: cannot detect int type."); // Scan all column indices and sort into two groups: // Local: those whose GID matches a GID of the domain map on this processor and // Remote: All others. int numDomainElements = domainMap.NumMyElements(); bool * LocalGIDs = 0; if (numDomainElements>0) LocalGIDs = new bool[numDomainElements]; for (i=0; i<numDomainElements; i++) LocalGIDs[i] = false; // Assume domain GIDs are not local bool DoSizes = !domainMap.ConstantElementSize(); // If not constant element size, then error if(DoSizes) throw std::runtime_error("LowCommunicationMakeColMapAndReindex: cannot handle non-constant sized domainMap."); // In principle it is good to have RemoteGIDs and RemotGIDList be as long as the number of remote GIDs // on this processor, but this would require two passes through the column IDs, so we make it the max of 100 // and the number of block rows. const int numMyBlockRows = N; int hashsize = numMyBlockRows; if (hashsize < 100) hashsize = 100; Epetra_HashTable<int_type> RemoteGIDs(hashsize); std::vector<int_type> RemoteGIDList; RemoteGIDList.reserve(hashsize); std::vector<int> PIDList; PIDList.reserve(hashsize); // Here we start using the *int* colind array. If int_type==int this clobbers the GIDs, if // int_type==long long, then this is the first use of the colind array. // For *local* GID's set colind with with their LID in the domainMap. For *remote* GIDs, // we set colind with (numDomainElements+NumRemoteColGIDs) before the increment of // the remote count. These numberings will be separate because no local LID is greater // than numDomainElements. int NumLocalColGIDs = 0; int NumRemoteColGIDs = 0; for(i = 0; i < numMyBlockRows; i++) { for(j = rowptr[i]; j < rowptr[i+1]; j++) { int_type GID = colind_GID[j]; // Check if GID matches a row GID int LID = domainMap.LID(GID); if(LID != -1) { bool alreadyFound = LocalGIDs[LID]; if (!alreadyFound) { LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID NumLocalColGIDs++; } colind_LID[j] = LID; } else { int_type hash_value=RemoteGIDs.Get(GID); if(hash_value == -1) { // This means its a new remote GID int PID = owningPIDs[j]; if(PID==-1) throw std::runtime_error("LowCommunicationMakeColMapAndReindex: Cannot figure out if PID is owned."); colind_LID[j] = numDomainElements + NumRemoteColGIDs; RemoteGIDs.Add(GID, NumRemoteColGIDs); RemoteGIDList.push_back(GID); PIDList.push_back(PID); NumRemoteColGIDs++; } else colind_LID[j] = numDomainElements + hash_value; } } } // Possible short-circuit: If all domain map GIDs are present as column indices, then set ColMap=domainMap and quit if (domainMap.Comm().NumProc()==1) { if (NumRemoteColGIDs!=0) { throw std::runtime_error("Some column IDs are not in domainMap. If matrix is rectangular, you must pass in a domainMap"); // Sanity test: When one processor,there can be no remoteGIDs } if (NumLocalColGIDs==numDomainElements) { if (LocalGIDs!=0) delete [] LocalGIDs; // In this case, we just use the domainMap's indices, which is, not coincidently, what we clobbered colind with up above anyway. // No further reindexing is needed. NewColMap = domainMap; return 0; } } // Now build integer array containing column GIDs // Build back end, containing remote GIDs, first int numMyBlockCols = NumLocalColGIDs + NumRemoteColGIDs; std::vector<int_type> ColIndices; int_type * RemoteColIndices=0; if(numMyBlockCols > 0) { ColIndices.resize(numMyBlockCols); if(NumLocalColGIDs!=numMyBlockCols) RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back end of ColIndices else RemoteColIndices=0; } for(i = 0; i < NumRemoteColGIDs; i++) RemoteColIndices[i] = RemoteGIDList[i]; // Build permute array for *remote* reindexing. std::vector<int> RemotePermuteIDs(NumRemoteColGIDs); for(i=0; i<NumRemoteColGIDs; i++) RemotePermuteIDs[i]=i; // Sort External column indices so that all columns coming from a given remote processor are contiguous int NumListsInt=0; int NumListsLL =0; int * IntSortLists[2]; long long * LLSortLists[2]; int * RemotePermuteIDs_ptr = RemotePermuteIDs.size() ? &RemotePermuteIDs[0] : 0; if(!UseLL) { // int version IntSortLists[0] = (int*) RemoteColIndices; IntSortLists[1] = RemotePermuteIDs_ptr; NumListsInt=2; } else { //LL version LLSortLists[0] = (long long*) RemoteColIndices; IntSortLists[0] = RemotePermuteIDs_ptr; NumListsInt = NumListsLL = 1; } int * PIDList_ptr = PIDList.size() ? &PIDList[0] : 0; Epetra_Util::Sort(true, NumRemoteColGIDs, PIDList_ptr, 0, 0, NumListsInt, IntSortLists,NumListsLL,LLSortLists); // Stash the RemotePIDs PIDList.resize(NumRemoteColGIDs); RemotePIDs = PIDList; if (SortGhostsAssociatedWithEachProcessor) { // Sort external column indices so that columns from a given remote processor are not only contiguous // but also in ascending order. NOTE: I don't know if the number of externals associated // with a given remote processor is known at this point ... so I count them here. // NTS: Only sort the RemoteColIndices this time... int StartCurrent, StartNext; StartCurrent = 0; StartNext = 1; while ( StartNext < NumRemoteColGIDs ) { if (PIDList[StartNext]==PIDList[StartNext-1]) StartNext++; else { IntSortLists[0] = &RemotePermuteIDs[StartCurrent]; Epetra_Util::Sort(true,StartNext-StartCurrent, &(RemoteColIndices[StartCurrent]),0,0,1,IntSortLists,0,0); StartCurrent = StartNext; StartNext++; } } IntSortLists[0] = &RemotePermuteIDs[StartCurrent]; Epetra_Util::Sort(true, StartNext-StartCurrent, &(RemoteColIndices[StartCurrent]), 0, 0, 1,IntSortLists,0,0); } // Reverse the permutation to get the information we actually care about std::vector<int> ReverseRemotePermuteIDs(NumRemoteColGIDs); for(i=0; i<NumRemoteColGIDs; i++) ReverseRemotePermuteIDs[RemotePermuteIDs[i]]=i; // Build permute array for *local* reindexing. bool use_local_permute=false; std::vector<int> LocalPermuteIDs(numDomainElements); // Now fill front end. Two cases: // (1) If the number of Local column GIDs is the same as the number of Local domain GIDs, we // can simply read the domain GIDs into the front part of ColIndices, otherwise // (2) We step through the GIDs of the domainMap, checking to see if each domain GID is a column GID. // we want to do this to maintain a consistent ordering of GIDs between the columns and the domain. if(NumLocalColGIDs == domainMap.NumMyElements()) { if(NumLocalColGIDs > 0) { domainMap.MyGlobalElements(&ColIndices[0]); // Load Global Indices into first numMyBlockCols elements column GID list } } else { int_type* MyGlobalElements = 0; domainMap.MyGlobalElementsPtr(MyGlobalElements); int* ElementSizeList = 0; if(DoSizes) ElementSizeList = domainMap.ElementSizeList(); int NumLocalAgain = 0; use_local_permute = true; for(i = 0; i < numDomainElements; i++) { if(LocalGIDs[i]) { LocalPermuteIDs[i] = NumLocalAgain; ColIndices[NumLocalAgain++] = MyGlobalElements[i]; } } assert(NumLocalAgain==NumLocalColGIDs); // Sanity test } // Done with this array if (LocalGIDs!=0) delete [] LocalGIDs; // Make Column map with same element sizes as Domain map int_type * ColIndices_ptr = ColIndices.size() ? &ColIndices[0] : 0; MapType2 temp((int_type)(-1), numMyBlockCols, ColIndices_ptr, (int_type)domainMap.IndexBase64(), domainMap.Comm()); NewColMap = temp; // Low-cost reindex of the matrix for(i=0; i<numMyBlockRows; i++){ for(j=rowptr[i]; j<rowptr[i+1]; j++){ int ID=colind_LID[j]; if(ID < numDomainElements){ if(use_local_permute) colind_LID[j] = LocalPermuteIDs[colind_LID[j]]; // In the case where use_local_permute==false, we just copy the DomainMap's ordering, which it so happens // is what we put in colind to begin with. } else colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements]; } } return 0; }
int powerMethodTests(Epetra_RowMatrix & A, Epetra_RowMatrix & JadA, Epetra_Map & Map, Epetra_Vector & q, Epetra_Vector & z, Epetra_Vector & resid, bool verbose) { // variable needed for iteration double lambda = 0.0; // int niters = 10000; int niters = 300; double tolerance = 1.0e-2; int ierr = 0; ///////////////////////////////////////////////////////////////////////////////////////////////// // Iterate Epetra_Time timer(Map.Comm()); double startTime = timer.ElapsedTime(); EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); double elapsed_time = timer.ElapsedTime() - startTime; double total_flops = q.Flops(); double MFLOPs = total_flops/elapsed_time/1000000.0; double lambdaref = lambda; double flopsref = total_flops; if (verbose) cout << "\n\nTotal MFLOPs for reference first solve = " << MFLOPs << endl << "Total FLOPS = " <<total_flops <<endl<<endl; lambda = 0.0; startTime = timer.ElapsedTime(); EPETRA_TEST_ERR(power_method(false, JadA, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime() - startTime; total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for candidate first solve = " << MFLOPs << endl << "Total FLOPS = " <<total_flops <<endl<<endl; EPETRA_TEST_ERR(checkValues(lambda,lambdaref," No-transpose Power Method result", verbose),ierr); EPETRA_TEST_ERR(checkValues(total_flops,flopsref," No-transpose Power Method flop count", verbose),ierr); ///////////////////////////////////////////////////////////////////////////////////////////////// // Solve transpose problem if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n" << endl; // Iterate lambda = 0.0; startTime = timer.ElapsedTime(); EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime() - startTime; total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; lambdaref = lambda; flopsref = total_flops; if (verbose) cout << "\n\nTotal MFLOPs for reference transpose solve = " << MFLOPs << endl << "Total FLOPS = " <<total_flops <<endl<<endl; lambda = 0.0; startTime = timer.ElapsedTime(); EPETRA_TEST_ERR(power_method(true, JadA, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime() - startTime; total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for candidate transpose solve = " << MFLOPs << endl << "Total FLOPS = " <<total_flops <<endl<<endl; EPETRA_TEST_ERR(checkValues(lambda,lambdaref,"Transpose Power Method result", verbose),ierr); EPETRA_TEST_ERR(checkValues(total_flops,flopsref,"Transpose Power Method flop count", verbose),ierr); EPETRA_TEST_ERR(check(A, JadA, verbose),ierr); return(0); }
Epetra_CrsMatrix * Laplace2D::CreateLaplacian( const int nx, const int ny, const Epetra_Comm * Comm) { int NumGlobalElements = nx * ny; // create a map Epetra_Map * Map = new Epetra_Map(NumGlobalElements,0,*Comm); // local number of rows int NumMyElements = Map->NumMyElements(); // get update list int * MyGlobalElements = Map->MyGlobalElements(); double hx = 1.0/(nx-1); double hy = 1.0/(ny-1); double off_left = -1.0/(hx*hx); double off_right = -1.0/(hx*hx); double off_lower = -1.0/(hy*hy); double off_upper = -1.0/(hy*hy); double diag = 2.0/(hx*hx) + 2.0/(hy*hy); int left, right, lower, upper; // a bit overestimated the nonzero per row Epetra_CrsMatrix * A = new Epetra_CrsMatrix(Copy,*Map,5); // Add rows one-at-a-time double * Values = new double[4]; int * Indices = new int[4]; for( int i = 0; i < NumMyElements; ++i ) { int NumEntries=0; get_myNeighbours( MyGlobalElements[i], nx, ny, left, right, lower, upper ); if( left != -1 ) { Indices[NumEntries] = left; Values[NumEntries] = off_left; ++NumEntries; } if( right != -1 ) { Indices[NumEntries] = right; Values[NumEntries] = off_right; ++NumEntries; } if( lower != -1 ) { Indices[NumEntries] = lower; Values[NumEntries] = off_lower; ++NumEntries; } if( upper != -1 ) { Indices[NumEntries] = upper; Values[NumEntries] = off_upper; ++NumEntries; } // put the off-diagonal entries A->InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices); // Put in the diagonal entry A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, MyGlobalElements+i); } // put matrix in local ordering A->FillComplete(); delete [] Indices; delete [] Values; delete Map; return A; } /* createJacobian */
void LOCA::Epetra::AugmentedOp::buildExtendedMap(const Epetra_BlockMap& uMap, Epetra_Map*& eMapPtr, bool buildImporter, bool haveParam) { Epetra_BlockMap& nonconstUnderlyingMap = const_cast<Epetra_BlockMap&>(uMap); // Convert underlying map to point map if necessary Epetra_Map* uPointMapPtr = dynamic_cast<Epetra_Map*>(&nonconstUnderlyingMap); bool allocatedPointMap = false; if (uPointMapPtr == NULL) { allocatedPointMap = true; blockMap2PointMap(uMap, uPointMapPtr); } int max_gid = uPointMapPtr->MaxAllGID(); int num_global_elements = uPointMapPtr->NumGlobalElements(); int num_my_elements = uPointMapPtr->NumMyElements(); int *global_elements = uPointMapPtr->MyGlobalElements(); const Epetra_Comm& comm = uPointMapPtr->Comm(); int index_base = uPointMapPtr->IndexBase(); int ext_num_global_elements; int ext_num_my_elements; int *ext_global_elements; // Compute number of extended global elements if (buildImporter) ext_num_global_elements = num_global_elements + numConstraints*comm.NumProc(); else ext_num_global_elements = num_global_elements + numConstraints; // Compute number of extended local elements if (buildImporter || haveParam) ext_num_my_elements = num_my_elements + numConstraints; else ext_num_my_elements = num_my_elements; // Allocate extended global elements array ext_global_elements = new int[ext_num_my_elements]; // Set extended global elements for (int i=0; i<num_my_elements; i++) { ext_global_elements[i] = global_elements[i]; } if (buildImporter || haveParam) for (int i=0; i<numConstraints; i++) ext_global_elements[num_my_elements+i] = max_gid + 1 + i; // Create extended point map eMapPtr = new Epetra_Map(ext_num_global_elements, ext_num_my_elements, ext_global_elements, index_base, comm); // Free global elements array delete [] ext_global_elements; if (allocatedPointMap) delete uPointMapPtr; }
// ============================================================================ void EpetraExt::XMLWriter:: Write(const std::string& Label, const Epetra_Map& Map) { TEUCHOS_TEST_FOR_EXCEPTION(IsOpen_ == false, std::logic_error, "No file has been opened"); long long NumGlobalElements = Map.NumGlobalElements64(); const int* MyGlobalElements_int = 0; const long long* MyGlobalElements_LL = 0; Map.MyGlobalElements(MyGlobalElements_int, MyGlobalElements_LL); if(!MyGlobalElements_int || !MyGlobalElements_LL) throw "EpetraExt::XMLWriter::Write: ERROR, GlobalIndices type unknown."; if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "<Map Label=\"" << Label << "\" NumElements=\"" << NumGlobalElements << '"' << " IndexBase=\"" << Map.IndexBase64() << '"' << " NumProc=\"" << Comm_.NumProc() << '"'; of.close(); } for (int iproc = 0; iproc < Comm_.NumProc(); ++iproc) { if (iproc == Comm_.MyPID()) { std::ofstream of(FileName_.c_str(), std::ios::app); of << " ElementsOnProc" << iproc << "=\"" << Map.NumMyElements() << '"'; of.close(); } Comm_.Barrier(); } if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << '>' << std::endl; of.close(); } for (int iproc = 0; iproc < Comm_.NumProc(); iproc++) { if (iproc == Comm_.MyPID()) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "<Proc ID=\"" << Comm_.MyPID() << "\">" << std::endl; if(MyGlobalElements_int) { for (int i = 0; i < Map.NumMyElements(); ++i) { of << MyGlobalElements_int[i] << std::endl; } } else { for (int i = 0; i < Map.NumMyElements(); ++i) { of << MyGlobalElements_LL[i] << std::endl; } } of << "</Proc>" << std::endl; of.close(); } Comm_.Barrier(); } if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "</Map>" << std::endl; of.close(); } }
int main(int argc, char *argv[]) { int ierr = 0; double elapsed_time; double total_flops; double MFLOPs; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm comm( MPI_COMM_WORLD ); #else Epetra_SerialComm comm; #endif bool verbose = false; bool summary = false; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true; if(argc < 6) { cerr << "Usage: " << argv[0] << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl << "where:" << endl << "NumNodesX - Number of mesh nodes in X direction per processor" << endl << "NumNodesY - Number of mesh nodes in Y direction per processor" << endl << "NumProcX - Number of processors to use in X direction" << endl << "NumProcY - Number of processors to use in Y direction" << endl << "NumPoints - Number of points to use in stencil (5, 9 or 25 only)" << endl << "-v|-s - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl << " Serial example:" << endl << argv[0] << " 16 12 1 1 25 -v" << endl << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl << " MPI example:" << endl << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl << " in the X direction and 8 in the Y direction. Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl << endl; return(1); } //char tmp; //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl; //if (comm.MyPID()==0) cin >> tmp; //comm.Barrier(); comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose && comm.MyPID()==0) cout << Epetra_Version() << endl << endl; if (summary && comm.MyPID()==0) { if (comm.NumProc()==1) cout << Epetra_Version() << endl << endl; else cout << endl << endl; // Print two blank line to keep output columns lined up } if (verbose) cout << comm <<endl; // Redefine verbose to only print on PE 0 if (verbose && comm.MyPID()!=0) verbose = false; if (summary && comm.MyPID()!=0) summary = false; int numNodesX = atoi(argv[1]); int numNodesY = atoi(argv[2]); int numProcsX = atoi(argv[3]); int numProcsY = atoi(argv[4]); int numPoints = atoi(argv[5]); if (verbose || (summary && comm.NumProc()==1)) { cout << " Number of local nodes in X direction = " << numNodesX << endl << " Number of local nodes in Y direction = " << numNodesY << endl << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl << " Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl << " Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl << " Number of Processors in X direction = " << numProcsX << endl << " Number of Processors in Y direction = " << numProcsY << endl << " Number of Points in stencil = " << numPoints << endl << endl; } // Print blank line to keep output columns lined up if (summary && comm.NumProc()>1) cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl; if (numProcsX*numProcsY!=comm.NumProc()) { cerr << "Number of processors = " << comm.NumProc() << endl << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl; return(1); } if (numPoints!=5 && numPoints!=9 && numPoints!=25) { cerr << "Number of points specified = " << numPoints << endl << " is not 5, 9, 25" << endl << endl; return(1); } if (numNodesX*numNodesY<=0) { cerr << "Product of number of nodes is <= zero" << endl << endl; return(1); } Epetra_IntSerialDenseVector Xoff, XLoff, XUoff; Epetra_IntSerialDenseVector Yoff, YLoff, YUoff; if (numPoints==5) { // Generate a 5-point 2D Finite Difference matrix Xoff.Size(5); Yoff.Size(5); Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0; Xoff[4] = 0; Yoff[0] = 0; Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; // Generate a 2-point 2D Lower triangular Finite Difference matrix XLoff.Size(2); YLoff.Size(2); XLoff[0] = -1; XLoff[1] = 0; YLoff[0] = 0; YLoff[1] = -1; // Generate a 3-point 2D upper triangular Finite Difference matrix XUoff.Size(3); YUoff.Size(3); XUoff[0] = 0; XUoff[1] = 1; XUoff[2] = 0; YUoff[0] = 0; YUoff[1] = 0; YUoff[2] = 1; } else if (numPoints==9) { // Generate a 9-point 2D Finite Difference matrix Xoff.Size(9); Yoff.Size(9); Xoff[0] = -1; Xoff[1] = 0; Xoff[2] = 1; Yoff[0] = -1; Yoff[1] = -1; Yoff[2] = -1; Xoff[3] = -1; Xoff[4] = 0; Xoff[5] = 1; Yoff[3] = 0; Yoff[4] = 0; Yoff[5] = 0; Xoff[6] = -1; Xoff[7] = 0; Xoff[8] = 1; Yoff[6] = 1; Yoff[7] = 1; Yoff[8] = 1; // Generate a 5-point lower triangular 2D Finite Difference matrix XLoff.Size(5); YLoff.Size(5); XLoff[0] = -1; XLoff[1] = 0; Xoff[2] = 1; YLoff[0] = -1; YLoff[1] = -1; Yoff[2] = -1; XLoff[3] = -1; XLoff[4] = 0; YLoff[3] = 0; YLoff[4] = 0; // Generate a 4-point upper triangular 2D Finite Difference matrix XUoff.Size(4); YUoff.Size(4); XUoff[0] = 1; YUoff[0] = 0; XUoff[1] = -1; XUoff[2] = 0; XUoff[3] = 1; YUoff[1] = 1; YUoff[2] = 1; YUoff[3] = 1; } else { // Generate a 25-point 2D Finite Difference matrix Xoff.Size(25); Yoff.Size(25); int xi = 0, yi = 0; int xo = -2, yo = -2; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; // Generate a 13-point lower triangular 2D Finite Difference matrix XLoff.Size(13); YLoff.Size(13); xi = 0, yi = 0; xo = -2, yo = -2; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; // Generate a 13-point upper triangular 2D Finite Difference matrix XUoff.Size(13); YUoff.Size(13); xi = 0, yi = 0; xo = 0, yo = 0; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; } Epetra_Map * map; Epetra_Map * mapL; Epetra_Map * mapU; Epetra_CrsMatrix * A; Epetra_CrsMatrix * L; Epetra_CrsMatrix * U; Epetra_MultiVector * b; Epetra_MultiVector * bt; Epetra_MultiVector * xexact; Epetra_MultiVector * bL; Epetra_MultiVector * btL; Epetra_MultiVector * xexactL; Epetra_MultiVector * bU; Epetra_MultiVector * btU; Epetra_MultiVector * xexactU; Epetra_SerialDenseVector resvec(0); //Timings Epetra_Flops flopcounter; Epetra_Time timer(comm); #ifdef EPETRA_VERY_SHORT_PERFTEST int jstop = 1; #elif EPETRA_SHORT_PERFTEST int jstop = 1; #else int jstop = 2; #endif for (int j=0; j<jstop; j++) { for (int k=1; k<17; k++) { #ifdef EPETRA_VERY_SHORT_PERFTEST if (k<3 || (k%4==0 && k<9)) { #elif EPETRA_SHORT_PERFTEST if (k<6 || k%4==0) { #else if (k<7 || k%2==0) { #endif int nrhs=k; if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with "; bool StaticProfile = (j!=0); if (verbose) if (StaticProfile) cout << " static profile\n"; else cout << " dynamic profile\n"; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary, map, A, b, bt, xexact, StaticProfile, false); #ifdef EPETRA_HAVE_JADMATRIX timer.ResetStartTime(); Epetra_JadMatrix JA(*A); elapsed_time = timer.ElapsedTime(); if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl; //cout << "A = " << *A << endl; //cout << "JA = " << JA << endl; runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary); #endif runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary); delete A; delete b; delete bt; delete xexact; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(), XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary, mapL, L, bL, btL, xexactL, StaticProfile, true); GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(), XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary, mapU, U, bU, btU, xexactU, StaticProfile, true); runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary); delete L; delete bL; delete btL; delete xexactL; delete mapL; delete U; delete bU; delete btU; delete xexactU; delete mapU; Epetra_MultiVector q(*map, nrhs); Epetra_MultiVector z(q); Epetra_MultiVector r(q); delete map; q.SetFlopCounter(flopcounter); z.SetFlopCounter(q); r.SetFlopCounter(q); resvec.Resize(nrhs); flopcounter.ResetFlops(); timer.ResetStartTime(); //10 norms for( int i = 0; i < 10; ++i ) q.Norm2( resvec.Values() ); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Norm2" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Dot(z, resvec.Values()); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Dot's = " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "DotProd" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Update(1.0, z, 1.0, r, 0.0); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Update" << '\t'; cout << MFLOPs << endl; } } } } #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr ; } // Constructs a 2D PDE finite difference matrix using the list of x and y offsets. // // nx (In) - number of grid points in x direction // ny (In) - number of grid points in y direction // The total number of equations will be nx*ny ordered such that the x direction changes // most rapidly: // First equation is at point (0,0) // Second at (1,0) // ... // nx equation at (nx-1,0) // nx+1st equation at (0,1) // numPoints (In) - number of points in finite difference stencil // xoff (In) - stencil offsets in x direction (of length numPoints) // yoff (In) - stencil offsets in y direction (of length numPoints) // A standard 5-point finite difference stencil would be described as: // numPoints = 5 // xoff = [-1, 1, 0, 0, 0] // yoff = [ 0, 0, 0, -1, 1] // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID) // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors // A (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil, // diagonal will be slightly diag dominant. // b (Out) - Generated RHS. Values satisfy b = A*xexact // bt (Out) - Generated RHS. Values satisfy b = A'*xexact // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact // Note: Caller of this function is responsible for deleting all output objects. void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_Vector *& b, Epetra_Vector *& bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_MultiVector * b1, * bt1, * xexact1; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, xoff, yoff, 1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly); b = dynamic_cast<Epetra_Vector *>(b1); bt = dynamic_cast<Epetra_Vector *>(bt1); xexact = dynamic_cast<Epetra_Vector *>(xexact1); return; } void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, int nrhs, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_MultiVector *& b, Epetra_MultiVector *& bt, Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_Time timer(comm); // Determine my global IDs int * myGlobalElements; GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements); int numMyEquations = numNodesX*numNodesY; map = new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning. delete [] myGlobalElements; int numGlobalEquations = map->NumGlobalElements(); int profile = 0; if (StaticProfile) profile = numPoints; #ifdef EPETRA_HAVE_STATICPROFILE if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix #else if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix #endif int * indices = new int[numPoints]; double * values = new double[numPoints]; double dnumPoints = (double) numPoints; int nx = numNodesX*numProcsX; for (int i=0; i<numMyEquations; i++) { int rowID = map->GID(i); int numIndices = 0; for (int j=0; j<numPoints; j++) { int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets if (colID>-1 && colID<numGlobalEquations) { indices[numIndices] = colID; double value = - ((double) rand())/ ((double) RAND_MAX); if (colID==rowID) values[numIndices++] = dnumPoints - value; // Make diagonal dominant else values[numIndices++] = value; } } //cout << "Building row " << rowID << endl; A->InsertGlobalValues(rowID, numIndices, values, indices); } delete [] indices; delete [] values; double insertTime = timer.ElapsedTime(); timer.ResetStartTime(); A->FillComplete(false); double fillCompleteTime = timer.ElapsedTime(); if (verbose) cout << "Time to insert matrix values = " << insertTime << endl << "Time to complete fill = " << fillCompleteTime << endl; if (summary) { if (comm.NumProc()==1) cout << "InsertTime" << '\t'; cout << insertTime << endl; if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t'; cout << fillCompleteTime << endl; } if (nrhs<=1) { b = new Epetra_Vector(*map); bt = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); } else { b = new Epetra_MultiVector(*map, nrhs); bt = new Epetra_MultiVector(*map, nrhs); xexact = new Epetra_MultiVector(*map, nrhs); } xexact->Random(); // Fill xexact with random values A->Multiply(false, *xexact, *b); A->Multiply(true, *xexact, *bt); return; }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif int MyPID = Comm.MyPID(); bool verbose = true; if (MyPID==0) verbose = true; if (verbose) cout << EpetraExt::EpetraExt_Version() << endl << endl; cout << Comm << endl; if(argc < 2 && verbose) { cerr << "Usage: " << argv[0] << " HB_filename" << endl; return(1); } // Uncomment the next three lines to debug in mpi mode //int tmp; //if (MyPID==0) cin >> tmp; //Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; assert(A.FillComplete()==0); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if (Comm.MyPID()==0) { cout << "\n\n****************************************************" << endl; cout << "\n Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << " Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << " Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } Epetra_Vector tmp1(*readMap); Epetra_Vector tmp2(map); readA->Multiply(false, *readxexact, tmp1); A.Multiply(false, xexact, tmp2); double residual; tmp1.Norm2(&residual); if (verbose) cout << "Norm of Ax from file = " << residual << endl; tmp2.Norm2(&residual); if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl; //cout << "A from file = " << *readA << endl << endl << endl; //cout << "A after dist = " << A << endl << endl << endl; delete readA; delete readx; delete readb; delete readxexact; delete readMap; Comm.Barrier(); EpetraExt::RowMatrixToMatrixMarketFile("test.mm", A, "test matrix", "This is a test matrix"); #ifdef EPETRA_MPI MPI_Finalize() ; #endif return 0 ; }
/* Find the DBBD form */ int shylu_symbolic_factor ( Epetra_CrsMatrix *A, // i/p: A matrix shylu_symbolic *ssym, // symbolic structure shylu_data *data, // numeric structure, TODO: Required ? shylu_config *config // i/p: library configuration ) { #ifdef TIMING_OUTPUT Teuchos::Time symtime("symbolic time"); symtime.start(); #endif int myPID = A->Comm().MyPID(); int n = A->NumGlobalRows(); int Dnr; int Snr; int *DRowElems; int *SRowElems; int sym = config->sym; checkMaps(A); // Get column map Epetra_Map AColMap = A->ColMap(); int ncols = AColMap.NumMyElements(); int *cols = AColMap.MyGlobalElements(); // Get row map Epetra_Map ARowMap = A->RowMap(); int nrows = ARowMap.NumMyElements(); int *rows = ARowMap.MyGlobalElements(); // Find all columns in this proc int *gvals = new int[n]; // vector of size n, not ncols ! // gvals[local cols] = 1, gvals[shared cols] > 1. int SNumGlobalCols; findLocalColumns(A, gvals, SNumGlobalCols); // See if you can shrink the separator by assigning more rows/columns to // the block diagonals // TODO: This is because of a bug in coloring remove the if once that is // fixed //if (config->schurApproxMethod == 2) if (config->sep_type == 2) findNarrowSeparator(A, gvals); // 3. Assemble diagonal block and the border in convenient form [ /* In each processor, we have (in a permuted form) * | D_i C_i | * | R_i S_i | * D_i - diagonal block, C_i - Column Separator, R_i - Row separator * S_i - A22 block corresponding to Schur complement part of A * Assemble all four blocks in local matrices. */ ostringstream ssmsg1; ssmsg1 << "PID =" << myPID << " "; string msg = ssmsg1.str(); ssmsg1.clear(); ssmsg1.str(""); // Find #cols in each block int Dnc = 0; // #cols in diagonal block int Snc = 0; // #cols in the col. separator /* Looping on cols will work only for wide separator * as for narrow sep there will be some sep cols with gvals[col] ==1 * */ /*for (int i = 0; i < ncols ; i++) { if (gvals[cols[i]] == 1) Dnc++; else Snc++; } // Find #rows in each block Dnr = Dnc; // #rows in square diagonal block Snr = nrows - Dnr; // #rows in the row separator*/ // Find #rows in each block Dnr = 0; Snr = 0; for (int i = 0; i < nrows ; i++) { if (gvals[rows[i]] == 1) Dnr++; else Snr++; } Dnc = Dnr; // TODO: Snc is no longer useful, should remove it for (int i = 0; i < ncols ; i++) { if (gvals[cols[i]] != 1) Snc++; } assert(Snc >= 0); // TODO : The above assignment may not be correct in the unsymetric case ////config->dm.print(2, msg + " Mycols="); cout << msg << " Mycols="<< ncols << "Myrows ="<< nrows << endl; cout << msg << " #rows and #cols in diagonal blk ="<< Dnr << endl; cout << msg << " #columns in S ="<< Snc << endl; cout << msg << " #rows in S ="<< Snr << endl; ostringstream pidstr; pidstr << myPID ; // Create a row map for the D and S blocks [ DRowElems = new int[Dnr]; SRowElems = new int[Snr]; int gid; // Assemble row ids in two arrays (for D and R blocks) if (sym) { findBlockElems(A, nrows, rows, gvals, Dnr, DRowElems, Snr, SRowElems, "D"+pidstr.str()+"Rows", "S"+pidstr.str()+"Rows", false) ; } else { // SRowElems are not known until factorization, TODO assert(0 == 1); } data->Dnr = Dnr; data->Snr = Snr; data->Dnc = Dnc; data->DRowElems = DRowElems; data->SRowElems = SRowElems; // Create a column map for the D and S blocks [ int *DColElems = new int[Dnc]; // Elems in column map of D int *SColElems = new int[Snc]; // Elems in column map of C TODO: Unused // Assemble column ids in two arrays (for D and C blocks) findBlockElems(A, ncols, cols, gvals, Dnc, DColElems, Snc, SColElems, "D"+pidstr.str()+"Cols", "S"+pidstr.str()+"Cols", true) ; data->DColElems = DColElems; data->gvals = gvals; for (int i = 0; i < Snr; i++) { // Epetra guarentees columns corresponding to local rows will be first // in the column map. assert(SRowElems[i] == SColElems[i]); } // ] /*--Create the Epetra Matrices with the maps (does not insert values) --- */ create_matrices(A, ssym, data, config); /*--Extract the Epetra Matrices and call fillComplete --- */ extract_matrices(A, ssym, data, config, true); delete[] SColElems; Amesos Factory; const char* SolverType = config->diagonalBlockSolver.c_str(); bool IsAvailable = Factory.Query(SolverType); assert(IsAvailable == true); Teuchos::RCP<Epetra_LinearProblem> LP = Teuchos::RCP<Epetra_LinearProblem> (new Epetra_LinearProblem()); LP->SetOperator((ssym->D).getRawPtr()); //LP->SetOperator((ssym->DT).getRawPtr()); // for transpose // Create temp vectors ssym->Dlhs = Teuchos::RCP<Epetra_MultiVector> (new Epetra_MultiVector(ssym->D->RowMap(), 16)); ssym->Drhs = Teuchos::RCP<Epetra_MultiVector> (new Epetra_MultiVector(ssym->D->RowMap(), 16)); ssym->Gvec = Teuchos::RCP<Epetra_MultiVector> (new Epetra_MultiVector(ssym->G->RowMap(), 16)); LP->SetRHS(ssym->Drhs.getRawPtr()); LP->SetLHS(ssym->Dlhs.getRawPtr()); ssym->ReIdx_LP = Teuchos::RCP< EpetraExt::ViewTransform<Epetra_LinearProblem> > (new EpetraExt::LinearProblem_Reindex2(0)); ssym->LP = Teuchos::RCP<Epetra_LinearProblem>(&((*(ssym->ReIdx_LP))(*LP)), false); Teuchos::RCP<Amesos_BaseSolver> Solver = Teuchos::RCP<Amesos_BaseSolver> (Factory.Create(SolverType, *(ssym->LP))); //config->dm.print(5, "Created the diagonal solver"); #ifdef TIMING_OUTPUT Teuchos::Time ftime("setup time"); ftime.start(); #endif //Solver->SetUseTranspose(true); // for transpose Teuchos::ParameterList aList; aList.set("TrustMe", true); Solver->SetParameters(aList); Solver->SymbolicFactorization(); //config->dm.print(3, "Symbolic Factorization done"); #ifdef TIMING_OUTPUT ftime.stop(); cout << "Symbolic Factorization Time" << ftime.totalElapsedTime() << endl; ftime.reset(); #endif ssym->OrigLP = LP; //ssym->LP = LP; ssym->Solver = Solver; if (config->schurApproxMethod == 1) { Teuchos::ParameterList pList; Teuchos::RCP<Isorropia::Epetra::Prober> prober = Teuchos::RCP<Isorropia::Epetra::Prober> (new Isorropia::Epetra::Prober((ssym->Sg).getRawPtr(), pList, false)); //config->dm.print(3, "Doing Coloring"); #ifdef TIMING_OUTPUT ftime.start(); #endif prober->color(); #ifdef TIMING_OUTPUT ftime.stop(); cout << "Time to color" << ftime.totalElapsedTime() << endl; ftime.reset(); ftime.start(); #endif ssym->prober = prober; } #ifdef TIMING_OUTPUT symtime.stop(); cout << "Symbolic Time" << symtime.totalElapsedTime() << endl; symtime.reset(); #endif }
int CreateTridi(Epetra_CrsMatrix& A) { Epetra_Map Map = A.RowMap(); int NumMyElements = Map.NumMyElements(); int NumGlobalElements = Map.NumGlobalElements(); int * MyGlobalElements = new int[NumMyElements]; Map.MyGlobalElements(MyGlobalElements); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 double *Values = new double[3]; int *Indices = new int[3]; int NumEntries; for (int i=0; i<NumMyElements; i++) { if (MyGlobalElements[i]==0) { Indices[0] = 0; Indices[1] = 1; Values[0] = 2.0; Values[1] = -1.0; NumEntries = 2; } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-1; Indices[1] = NumGlobalElements-2; Values[0] = 2.0; Values[1] = -1.0; NumEntries = 2; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]; Indices[2] = MyGlobalElements[i]+1; Values[0] = -1.0; Values[1] = 2.0; Values[2] = -1.0; NumEntries = 3; } assert(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0); // Put in the diagonal entry // assert(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i])==0); } // Finish up assert(A.FillComplete()==0); delete[] MyGlobalElements; delete[] Values; delete[] Indices; return 0; }
int Drumm3(const Epetra_Map& map, bool verbose) { const Epetra_Comm & Comm = map.Comm(); /* get number of processors and the name of this processor */ int Numprocs = Comm.NumProc(); int MyPID = Comm.MyPID(); if (Numprocs != 2) return(0); int NumGlobalRows = 4; int IndexBase = 0; Epetra_Map Map(NumGlobalRows, IndexBase, Comm); // Construct FECrsMatrix int NumEntriesPerRow = 3; Epetra_FECrsMatrix A(Copy, Map, NumEntriesPerRow); double ElementArea = 0.5; int NumCols = 3; int* Indices = new int[NumCols]; if(MyPID==0) // indices corresponding to element 0 on processor 0 { Indices[0] = 0; Indices[1] = 1; Indices[2] = 3; } else if(MyPID==1) // indices corresponding to element 1 on processor 1 { Indices[0] = 1; Indices[1] = 2; Indices[2] = 3; } double* Values = new double[NumCols*NumCols]; // removal term Values[0] = 2*ElementArea/12.; Values[1] = 1*ElementArea/12.; Values[2] = 1*ElementArea/12.; Values[3] = 1*ElementArea/12.; Values[4] = 2*ElementArea/12.; Values[5] = 1*ElementArea/12.; Values[6] = 1*ElementArea/12.; Values[7] = 1*ElementArea/12.; Values[8] = 2*ElementArea/12.; A.InsertGlobalValues(NumCols, Indices, Values, Epetra_FECrsMatrix::ROW_MAJOR); A.GlobalAssemble(); A.GlobalAssemble(); // A.Print(cout); // Create vectors for CG algorithm Epetra_FEVector* bptr = new Epetra_FEVector(A.RowMap(), 1); Epetra_FEVector* x0ptr = new Epetra_FEVector(A.RowMap(), 1); Epetra_FEVector& b = *bptr; Epetra_FEVector& x0 = *x0ptr; // source terms NumCols = 2; if(MyPID==0) // indices corresponding to element 0 on processor 0 { Indices[0] = 0; Indices[1] = 3; Values[0] = 1./2.; Values[1] = 1./2.; } else { Indices[0] = 1; Indices[1] = 2; Values[0] = 0; Values[1] = 0; } b.SumIntoGlobalValues(NumCols, Indices, Values); b.GlobalAssemble(); if (verbose&&MyPID==0) cout << "b:" << endl; if (verbose) { b.Print(cout); } x0 = b; if (verbose&&MyPID==0) { cout << "x:"<<endl; } if (verbose) { x0.Print(cout); } delete [] Values; delete [] Indices; delete bptr; delete x0ptr; return(0); }
int checkmap(Epetra_Map & Map, int NumGlobalElements, int NumMyElements, int *MyGlobalElements, int IndexBase, Epetra_Comm& Comm, bool DistributedGlobal) { int i, ierr=0, forierr = 0; EPETRA_TEST_ERR(!Map.ConstantElementSize(),ierr); EPETRA_TEST_ERR(DistributedGlobal!=Map.DistributedGlobal(),ierr); EPETRA_TEST_ERR(Map.ElementSize()!=1,ierr); int *MyElementSizeList = new int[NumMyElements]; EPETRA_TEST_ERR(Map.ElementSizeList(MyElementSizeList)!=0,ierr); forierr = 0; for (i=0; i<NumMyElements; i++) forierr += MyElementSizeList[i]!=1; EPETRA_TEST_ERR(forierr,ierr); delete [] MyElementSizeList; const Epetra_Comm & Comm1 = Map.Comm(); EPETRA_TEST_ERR(Comm1.NumProc()!=Comm.NumProc(),ierr); EPETRA_TEST_ERR(Comm1.MyPID()!=Comm.MyPID(),ierr); EPETRA_TEST_ERR(Map.IndexBase()!=IndexBase,ierr); EPETRA_TEST_ERR(!Map.LinearMap() && MyGlobalElements==0,ierr); EPETRA_TEST_ERR(Map.LinearMap() && MyGlobalElements!=0,ierr); EPETRA_TEST_ERR(Map.MaxAllGID()!=NumGlobalElements-1+IndexBase,ierr); EPETRA_TEST_ERR(Map.MaxElementSize()!=1,ierr); int MaxLID = Map.MaxLID(); EPETRA_TEST_ERR(MaxLID!=NumMyElements-1,ierr); int MaxMyGID = (Comm.MyPID()+1)*NumMyElements-1+IndexBase; if (Comm.MyPID()>2) MaxMyGID+=3; if (!DistributedGlobal) MaxMyGID = NumMyElements-1+IndexBase; EPETRA_TEST_ERR(Map.MaxMyGID()!=MaxMyGID,ierr); EPETRA_TEST_ERR(Map.MinAllGID()!=IndexBase,ierr); EPETRA_TEST_ERR(Map.MinElementSize()!=1,ierr); EPETRA_TEST_ERR(Map.MinLID()!=0,ierr); int MinMyGID = Comm.MyPID()*NumMyElements+IndexBase; if (Comm.MyPID()>2) MinMyGID+=3; if (!DistributedGlobal) MinMyGID = 0; EPETRA_TEST_ERR(Map.MinMyGID()!=MinMyGID,ierr); int * MyGlobalElements1 = new int[NumMyElements]; EPETRA_TEST_ERR(Map.MyGlobalElements(MyGlobalElements1)!=0,ierr); forierr = 0; if (MyGlobalElements==0) { for (i=0; i<NumMyElements; i++) forierr += MyGlobalElements1[i]!=MinMyGID+i; EPETRA_TEST_ERR(forierr,ierr); } else { for (i=0; i<NumMyElements; i++) forierr += MyGlobalElements[i]!=MyGlobalElements1[i]; EPETRA_TEST_ERR(forierr,ierr); } EPETRA_TEST_ERR(Map.NumGlobalElements()!=NumGlobalElements,ierr); EPETRA_TEST_ERR(Map.NumGlobalPoints()!=NumGlobalElements,ierr); EPETRA_TEST_ERR(Map.NumMyElements()!=NumMyElements,ierr); EPETRA_TEST_ERR(Map.NumMyPoints()!=NumMyElements,ierr); int MaxMyGID2 = Map.GID(Map.LID(MaxMyGID)); EPETRA_TEST_ERR(MaxMyGID2 != MaxMyGID,ierr); int MaxLID2 = Map.LID(Map.GID(MaxLID)); EPETRA_TEST_ERR(MaxLID2 != MaxLID,ierr); EPETRA_TEST_ERR(Map.GID(MaxLID+1) != IndexBase-1,ierr);// MaxLID+1 doesn't exist EPETRA_TEST_ERR(Map.LID(MaxMyGID+1) != -1,ierr);// MaxMyGID+1 doesn't exist or is on a different processor EPETRA_TEST_ERR(!Map.MyGID(MaxMyGID),ierr); EPETRA_TEST_ERR(Map.MyGID(MaxMyGID+1),ierr); EPETRA_TEST_ERR(!Map.MyLID(MaxLID),ierr); EPETRA_TEST_ERR(Map.MyLID(MaxLID+1),ierr); EPETRA_TEST_ERR(!Map.MyGID(Map.GID(MaxLID)),ierr); EPETRA_TEST_ERR(Map.MyGID(Map.GID(MaxLID+1)),ierr); EPETRA_TEST_ERR(!Map.MyLID(Map.LID(MaxMyGID)),ierr); EPETRA_TEST_ERR(Map.MyLID(Map.LID(MaxMyGID+1)),ierr); // Check RemoteIDList function // Get some GIDs off of each processor to test int TotalNumEle, NumElePerProc, NumProc = Comm.NumProc(); int MinNumEleOnProc; int NumMyEle=Map.NumMyElements(); Comm.MinAll(&NumMyEle,&MinNumEleOnProc,1); if (MinNumEleOnProc > 5) NumElePerProc = 6; else NumElePerProc = MinNumEleOnProc; if (NumElePerProc > 0) { TotalNumEle = NumElePerProc*NumProc; int * MyGIDlist = new int[NumElePerProc]; int * GIDlist = new int[TotalNumEle]; int * PIDlist = new int[TotalNumEle]; int * LIDlist = new int[TotalNumEle]; for (i=0; i<NumElePerProc; i++) MyGIDlist[i] = MyGlobalElements1[i]; Comm.GatherAll(MyGIDlist,GIDlist,NumElePerProc);// Get a few values from each proc Map.RemoteIDList(TotalNumEle, GIDlist, PIDlist, LIDlist); int MyPID= Comm.MyPID(); forierr = 0; for (i=0; i<TotalNumEle; i++) { if (Map.MyGID(GIDlist[i])) { forierr += PIDlist[i] != MyPID; forierr += !Map.MyLID(Map.LID(GIDlist[i])) || Map.LID(GIDlist[i]) != LIDlist[i] || Map.GID(LIDlist[i]) != GIDlist[i]; } else { forierr += PIDlist[i] == MyPID; // If MyGID comes back false, the PID listed should be that of another proc } } EPETRA_TEST_ERR(forierr,ierr); delete [] MyGIDlist; delete [] GIDlist; delete [] PIDlist; delete [] LIDlist; } delete [] MyGlobalElements1; // Check RemoteIDList function (assumes all maps are linear, even if not stored that way) if (Map.LinearMap()) { int * GIDList = new int[3]; int * PIDList = new int[3]; int * LIDList = new int[3]; int MyPID = Map.Comm().MyPID(); int NumIDs = 0; //GIDList[NumIDs++] = Map.MaxAllGID()+1; // Should return -1 for both PID and LID if (Map.MinMyGID()-1>=Map.MinAllGID()) GIDList[NumIDs++] = Map.MinMyGID()-1; if (Map.MaxMyGID()+1<=Map.MaxAllGID()) GIDList[NumIDs++] = Map.MaxMyGID()+1; Map.RemoteIDList(NumIDs, GIDList, PIDList, LIDList); NumIDs = 0; //EPETRA_TEST_ERR(!(PIDList[NumIDs]==-1),ierr); //EPETRA_TEST_ERR(!(LIDList[NumIDs++]==-1),ierr); if (Map.MinMyGID()-1>=Map.MinAllGID()) EPETRA_TEST_ERR(!(PIDList[NumIDs++]==MyPID-1),ierr); if (Map.MaxMyGID()+1<=Map.MaxAllGID()) EPETRA_TEST_ERR(!(PIDList[NumIDs]==MyPID+1),ierr); if (Map.MaxMyGID()+1<=Map.MaxAllGID()) EPETRA_TEST_ERR(!(LIDList[NumIDs++]==0),ierr); delete [] GIDList; delete [] PIDList; delete [] LIDList; } return (ierr); }
int Drumm1(const Epetra_Map& map, bool verbose) { (void)verbose; //Simple 2-element problem (element as in "finite-element") from //Clif Drumm. Two triangular elements, one per processor, as shown //here: // // *----* // 3|\ 2| // | \ | // | 0\1| // | \| // *----* // 0 1 // //Element 0 on processor 0, element 1 on processor 1. //Processor 0 will own nodes 0,1 and processor 1 will own nodes 2,3. //Each processor will pass a 3x3 element-matrix to Epetra_FECrsMatrix. //After GlobalAssemble(), the matrix should be as follows: // // row 0: 2 1 0 1 //proc 0 row 1: 1 4 1 2 //---------------------------------- // row 2: 0 1 2 1 //proc 1 row 3: 1 2 1 4 // int numProcs = map.Comm().NumProc(); int localProc = map.Comm().MyPID(); if (numProcs != 2) return(0); //so first we'll set up a epetra_test::matrix_data object with //contents that match the above-described matrix. (but the //matrix_data object will have all 4 rows on each processor) int i; int rowlengths[4]; rowlengths[0] = 3; rowlengths[1] = 4; rowlengths[2] = 3; rowlengths[3] = 4; epetra_test::matrix_data matdata(4, rowlengths); for(i=0; i<4; ++i) { for(int j=0; j<matdata.rowlengths()[i]; ++j) { matdata.colindices()[i][j] = j; } } matdata.colindices()[0][2] = 3; matdata.colindices()[2][0] = 1; matdata.colindices()[2][1] = 2; matdata.colindices()[2][2] = 3; double** coefs = matdata.coefs(); coefs[0][0] = 2.0; coefs[0][1] = 1.0; coefs[0][2] = 1.0; coefs[1][0] = 1.0; coefs[1][1] = 4.0; coefs[1][2] = 1.0; coefs[1][3] = 2.0; coefs[2][0] = 1.0; coefs[2][1] = 2.0; coefs[2][2] = 1.0; coefs[3][0] = 1.0; coefs[3][1] = 2.0; coefs[3][2] = 1.0; coefs[3][3] = 4.0; //now we'll load a Epetra_FECrsMatrix with data that matches the //above-described finite-element problem. int indexBase = 0, ierr = 0; int myNodes[4]; double values[9]; values[0] = 2.0; values[1] = 1.0; values[2] = 1.0; values[3] = 1.0; values[4] = 2.0; values[5] = 1.0; values[6] = 1.0; values[7] = 1.0; values[8] = 2.0; int numMyNodes = 2; if (localProc == 0) { myNodes[0] = 0; myNodes[1] = 1; } else { myNodes[0] = 2; myNodes[1] = 3; } Epetra_Map Map(-1, numMyNodes, myNodes, indexBase, map.Comm()); numMyNodes = 3; if (localProc == 0) { myNodes[0] = 0; myNodes[1] = 1; myNodes[2] = 3; } else { myNodes[0] = 1; myNodes[1] = 2; myNodes[2] = 3; } int rowLengths = 3; Epetra_FECrsMatrix A(Copy, Map, rowLengths); EPETRA_TEST_ERR( A.InsertGlobalValues(numMyNodes, myNodes, numMyNodes, myNodes, values, Epetra_FECrsMatrix::ROW_MAJOR),ierr); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); //now the test is to check whether the FECrsMatrix data matches the //epetra_test::matrix_data object... bool the_same = matdata.compare_local_data(A); if (!the_same) { return(-1); } return(0); }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif // My MPI process rank. const int MyPID = Comm.MyPID(); // "/Users/sakashitatatsuya/Downloads/barista_trunk_slepc/sample/hamiltonian_matrix.ip" std::ifstream ifs(argv[1]); alps::Parameters params(ifs); Teuchos::oblackholestream blackHole; std::ostream& out = (MyPID == 0) ? std::cout : blackHole; barista::Hamiltonian<> hamiltonian(params); matrix_type matrix(hamiltonian.dimension(), hamiltonian.dimension()); hamiltonian.fill<double>(matrix); int m,n; int N; m = n = N = hamiltonian.dimension(); //std::cout << matrix << std::endl; std::ofstream ofs; if (MyPID==0) { ofs.open("anasazi_time.txt"); if (!ofs) { #ifdef HAVE_MPI MPI_Finalize() ; #endif return -1; } } //Teuchos::ParameterList GaleriList; using Teuchos::RCP; using Teuchos::rcp; typedef Teuchos::ScalarTraits<double> STS; const double one = STS::one(); const double zero = STS::zero(); // The problem is defined on a 2D grid, global size is nx * nx. //int nx = N; //GaleriList.set("n", nx * nx); //GaleriList.set("nx", nx); //GaleriList.set("ny", nx); //Teuchos::RCP<Epetra_Map> Map = Teuchos::rcp( Galeri::CreateMap("Linear", Comm, GaleriList) ); //Teuchos::RCP<Epetra_RowMatrix> A = Teuchos::rcp( Galeri::CreateCrsMatrix("Laplace2D", &*Map, GaleriList) ); // Construct a Map that puts approximately the same number of rows // of the matrix A on each processor. Epetra_Map RowMap (N, 0, Comm); Epetra_Map ColMap (N, 0, Comm); // Get update list and number of local equations from newly created Map. const int NumMyRowElements = RowMap.NumMyElements (); std::vector<int> MyGlobalRowElements (NumMyRowElements); RowMap.MyGlobalElements (&MyGlobalRowElements[0]); // Create an Epetra_CrsMatrix using the given row map. RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Copy, RowMap, n)); // We use info to catch any errors that may have happened during // matrix assembly, and report them globally. We do this so that // the MPI processes won't call FillComplete() unless they all // successfully filled their parts of the matrix. int info = 0; try { // // Compute coefficients for the discrete integral operator. // std::vector<double> Values (n); std::vector<int> Indices (n); //const double inv_mp1 = one / (m+1); //const double inv_np1 = one / (n+1); int count; //for (int i = 0; i < n; ++i) { // Indices[i] = i; //} for (int i = 0; i < NumMyRowElements; ++i) { count =0; for (int j = 0; j < n; ++j) { if (matrix(MyGlobalRowElements[i],j)!=0) { Values[count] = matrix(MyGlobalRowElements[i],j); Indices[count] = j; count++; } } info = A->InsertGlobalValues (MyGlobalRowElements[i], count, &Values[0], &Indices[0]); // Make sure that the insertion succeeded. Teuchos' // TEST_FOR_EXCEPTION macro gives a nice error message if the // thrown exception isn't caught. We'll report this on the // offending MPI process. /* TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "Failed to insert n=" << n << " global value" << (n != 1 ? "s" : "") << " in row " << MyGlobalRowElements[i] << " of the matrix." ); */ } // for i = 0... // Call FillComplete on the matrix. Since the matrix isn't square, // we have to give FillComplete the domain and range maps, which in // this case are the column resp. row maps. info = A->FillComplete (ColMap, RowMap); /* TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "FillComplete failed with INFO = " << info << "."); */ info = A->OptimizeStorage(); /* TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "OptimizeStorage failed with INFO = " << info << "."); */ } catch (std::runtime_error& e) { // If multiple MPI processes are reporting errors, sometimes // forming the error message as a string and then writing it to // the output stream prevents messages from different processes // from being interleaved. std::ostringstream os; os << "*** Error on MPI process " << MyPID << ": " << e.what(); cerr << os.str() << endl; if (info == 0) info = -1; // All procs will share info later on. } // Variables used for the Block Davidson Method const int nev = 5; const int blockSize = 5; const int numBlocks = 8; const int maxRestarts = 500; const double tol = 1.0e-8; typedef Epetra_MultiVector MV; typedef Epetra_Operator OP; typedef Anasazi::MultiVecTraits<double, Epetra_MultiVector> MVT; // Create an Epetra_MultiVector for an initial vector to start the solver. // Note: This needs to have the same number of columns as the blocksize. // //Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(*Map, blockSize) ); Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(ColMap, blockSize) ); ivec->Random(); // Create the eigenproblem. Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > problem = Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>(A, ivec) ); // Inform the eigenproblem that the operator A is symmetric problem->setHermitian(true); // Set the number of eigenvalues requested problem->setNEV( nev ); // Inform the eigenproblem that you are finishing passing it information bool boolret = problem->setProblem(); if (boolret != true) { std::cout<<"Anasazi::BasicEigenproblem::setProblem() returned an error." << std::endl; #ifdef HAVE_MPI MPI_Finalize(); #endif return -1; } // Create parameter list to pass into the solver manager Teuchos::ParameterList anasaziPL; anasaziPL.set( "Which", "LM" ); anasaziPL.set( "Block Size", blockSize ); anasaziPL.set( "Maximum Iterations", 500 ); anasaziPL.set( "Convergence Tolerance", tol ); anasaziPL.set( "Verbosity", Anasazi::Errors+Anasazi::Warnings+Anasazi::TimingDetails+Anasazi::FinalSummary ); // Create the solver manager Anasazi::LOBPCGSolMgr<double, MV, OP> anasaziSolver(problem, anasaziPL); // Solve the problem double start, end; MPI_Barrier(MPI_COMM_WORLD); start = MPI_Wtime(); Anasazi::ReturnType returnCode = anasaziSolver.solve(); MPI_Barrier(MPI_COMM_WORLD); end = MPI_Wtime(); // Get the eigenvalues and eigenvectors from the eigenproblem Anasazi::Eigensolution<double,MV> sol = problem->getSolution(); std::vector<Anasazi::Value<double> > evals = sol.Evals; Teuchos::RCP<MV> evecs = sol.Evecs; // Compute residuals. std::vector<double> normR(sol.numVecs); Teuchos::SerialDenseMatrix<int,double> T(sol.numVecs, sol.numVecs); Epetra_MultiVector tempAevec( ColMap, sol.numVecs ); T.putScalar(0.0); for (int i=0; i<sol.numVecs; i++) { T(i,i) = evals[i].realpart; } A->Apply( *evecs, tempAevec ); MVT::MvTimesMatAddMv( -1.0, *evecs, T, 1.0, tempAevec ); MVT::MvNorm( tempAevec, normR ); if (MyPID == 0) { // Print the results std::cout<<"Solver manager returned " << (returnCode == Anasazi::Converged ? "converged." : "unconverged.") << std::endl; std::cout<<std::endl; std::cout<<"------------------------------------------------------"<<std::endl; std::cout<<std::setw(16)<<"Eigenvalue" <<std::setw(18)<<"Direct Residual" <<std::endl; std::cout<<"------------------------------------------------------"<<std::endl; for (int i=0; i<sol.numVecs; i++) { std::cout<<std::setw(16)<<evals[i].realpart <<std::setw(18)<<normR[i]/evals[i].realpart <<std::endl; } std::cout<<"------------------------------------------------------"<<std::endl; } // Print out the map and matrices //ColMap.Print (out); //A->Print (cout); //RowMap.Print (cout); double time; int iter; if (MyPID==0) { iter = anasaziSolver.getNumIters(); Teuchos::Array<Teuchos::RCP<Teuchos::Time> > timer = anasaziSolver.getTimers(); Teuchos::RCP<Teuchos::Time> _timerSolve = timer[0]; cout << "timerSolve=" << _timerSolve << endl; time = end - start; cout << "time=" << time << endl; ofs << "time=" << time << endl; cout << "iter=" << iter << endl; ofs << "iter=" << iter << endl; } #ifdef HAVE_MPI MPI_Finalize() ; #endif return 0; }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif cout << Comm << endl; int MyPID = Comm.MyPID(); bool verbose = false; bool verbose1 = true; if (MyPID==0) verbose = true; if(argc < 2 && verbose) { cerr << "Usage: " << argv[0] << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl << "where:" << endl << "HB_filename - filename and path of a Harwell-Boeing data set" << endl << "level_fill - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl << "level_overlap - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl << "To specify a non-default value for one of these parameters, you must specify all" << endl << " preceding values but not any subsequent parameters. Example:" << endl << "ifpackHpcSerialMsr.exe mymatrix.hpc 1 - loads mymatrix.hpc, uses level fill of one, all other values are defaults" << endl << endl; return(1); } // Uncomment the next three lines to debug in mpi mode //int tmp; //if (MyPID==0) cin >> tmp; //Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; assert(A.FillComplete()==0); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if (Comm.MyPID()==0) { cout << "\n\n****************************************************" << endl; cout << "\n Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << " Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << " Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } Epetra_Vector tmp1(*readMap); Epetra_Vector tmp2(map); readA->Multiply(false, *readxexact, tmp1); A.Multiply(false, xexact, tmp2); double residual; tmp1.Norm2(&residual); if (verbose) cout << "Norm of Ax from file = " << residual << endl; tmp2.Norm2(&residual); if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl; //cout << "A from file = " << *readA << endl << endl << endl; //cout << "A after dist = " << A << endl << endl << endl; delete readA; delete readx; delete readb; delete readxexact; delete readMap; Comm.Barrier(); bool smallProblem = false; if (A.RowMap().NumGlobalElements()<100) smallProblem = true; if (smallProblem) cout << "Original Matrix = " << endl << A << endl; x.PutScalar(0.0); Epetra_LinearProblem FullProblem(&A, &x, &b); double normb, norma; b.NormInf(&normb); norma = A.NormInf(); if (verbose) cout << "Inf norm of Original Matrix = " << norma << endl << "Inf norm of Original RHS = " << normb << endl; Epetra_Time ReductionTimer(Comm); Epetra_CrsSingletonFilter SingletonFilter; Comm.Barrier(); double reduceInitTime = ReductionTimer.ElapsedTime(); SingletonFilter.Analyze(&A); Comm.Barrier(); double reduceAnalyzeTime = ReductionTimer.ElapsedTime() - reduceInitTime; if (SingletonFilter.SingletonsDetected()) cout << "Singletons found" << endl; else { cout << "Singletons not found" << endl; exit(1); } SingletonFilter.ConstructReducedProblem(&FullProblem); Comm.Barrier(); double reduceConstructTime = ReductionTimer.ElapsedTime() - reduceInitTime; double totalReduceTime = ReductionTimer.ElapsedTime(); if (verbose) cout << "\n\n****************************************************" << endl << " Reduction init time (sec) = " << reduceInitTime<< endl << " Reduction Analyze time (sec) = " << reduceAnalyzeTime << endl << " Construct Reduced Problem time (sec) = " << reduceConstructTime << endl << " Reduction Total time (sec) = " << totalReduceTime << endl<< endl; Statistics(SingletonFilter); Epetra_LinearProblem * ReducedProblem = SingletonFilter.ReducedProblem(); Epetra_CrsMatrix * Ap = dynamic_cast<Epetra_CrsMatrix *>(ReducedProblem->GetMatrix()); Epetra_Vector * bp = (*ReducedProblem->GetRHS())(0); Epetra_Vector * xp = (*ReducedProblem->GetLHS())(0); if (smallProblem) cout << " Reduced Matrix = " << endl << *Ap << endl << " LHS before sol = " << endl << *xp << endl << " RHS = " << endl << *bp << endl; // Construct ILU preconditioner double elapsed_time, total_flops, MFLOPs; Epetra_Time timer(Comm); int LevelFill = 0; if (argc > 2) LevelFill = atoi(argv[2]); if (verbose) cout << "Using Level Fill = " << LevelFill << endl; int Overlap = 0; if (argc > 3) Overlap = atoi(argv[3]); if (verbose) cout << "Using Level Overlap = " << Overlap << endl; double Athresh = 0.0; if (argc > 4) Athresh = atof(argv[4]); if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl; double Rthresh = 1.0; if (argc > 5) Rthresh = atof(argv[5]); if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl; Ifpack_IlukGraph * IlukGraph = 0; Ifpack_CrsRiluk * ILUK = 0; if (LevelFill>-1) { elapsed_time = timer.ElapsedTime(); IlukGraph = new Ifpack_IlukGraph(Ap->Graph(), LevelFill, Overlap); assert(IlukGraph->ConstructFilledGraph()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl; Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); ILUK = new Ifpack_CrsRiluk(*IlukGraph); ILUK->SetFlopCounter(fact_counter); ILUK->SetAbsoluteThreshold(Athresh); ILUK->SetRelativeThreshold(Rthresh); //assert(ILUK->InitValues()==0); int initerr = ILUK->InitValues(*Ap); if (initerr!=0) { cout << endl << Comm << endl << " InitValues error = " << initerr; if (initerr==1) cout << " Zero diagonal found, warning error only"; cout << endl << endl; } assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; //cout << *ILUK << endl; double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; } int Maxiter = 100; double Tolerance = 1.0E-8; Epetra_Flops counter; Ap->SetFlopCounter(counter); xp->SetFlopCounter(*Ap); bp->SetFlopCounter(*Ap); if (ILUK!=0) ILUK->SetFlopCounter(*Ap); elapsed_time = timer.ElapsedTime(); double normreducedb, normreduceda; bp->NormInf(&normreducedb); normreduceda = Ap->NormInf(); if (verbose) cout << "Inf norm of Reduced Matrix = " << normreduceda << endl << "Inf norm of Reduced RHS = " << normreducedb << endl; BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; SingletonFilter.ComputeFullSolution(); if (smallProblem) cout << " Reduced LHS after sol = " << endl << *xp << endl << " Full LHS after sol = " << endl << x << endl << " Full Exact LHS = " << endl << xexact << endl; Epetra_Vector resid(x); resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); double normx, normxexact; x.Norm2(&normx); xexact.Norm2(&normxexact); if (verbose) cout << "2-norm of computed solution = " << normx << endl << "2-norm of exact solution = " << normxexact << endl << "2-norm of difference between computed and exact solution = " << residual << endl; if (verbose1 && residual>1.0e-5) { if (verbose) cout << "Difference between computed and exact solution appears large..." << endl << "Computing norm of A times this difference. If this norm is small, then matrix is singular" << endl; Epetra_Vector bdiff(b); assert(A.Multiply(false, resid, bdiff)==0); assert(bdiff.Norm2(&residual)==0); if (verbose) cout << "2-norm of A times difference between computed and exact solution = " << residual << endl; } if (verbose) cout << "********************************************************" << endl << " Solving again with 2*Ax=2*b" << endl << "********************************************************" << endl; A.Scale(1.0); // A = 2*A b.Scale(1.0); // b = 2*b x.PutScalar(0.0); b.NormInf(&normb); norma = A.NormInf(); if (verbose) cout << "Inf norm of Original Matrix = " << norma << endl << "Inf norm of Original RHS = " << normb << endl; double updateReducedProblemTime = ReductionTimer.ElapsedTime(); SingletonFilter.UpdateReducedProblem(&FullProblem); Comm.Barrier(); updateReducedProblemTime = ReductionTimer.ElapsedTime() - updateReducedProblemTime; if (verbose) cout << "\n\n****************************************************" << endl << " Update Reduced Problem time (sec) = " << updateReducedProblemTime<< endl << "****************************************************" << endl; Statistics(SingletonFilter); if (LevelFill>-1) { Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); int initerr = ILUK->InitValues(*Ap); if (initerr!=0) { cout << endl << Comm << endl << " InitValues error = " << initerr; if (initerr==1) cout << " Zero diagonal found, warning error only"; cout << endl << endl; } assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; } bp->NormInf(&normreducedb); normreduceda = Ap->NormInf(); if (verbose) cout << "Inf norm of Reduced Matrix = " << normreduceda << endl << "Inf norm of Reduced RHS = " << normreducedb << endl; BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; SingletonFilter.ComputeFullSolution(); if (smallProblem) cout << " Reduced LHS after sol = " << endl << *xp << endl << " Full LHS after sol = " << endl << x << endl << " Full Exact LHS = " << endl << xexact << endl; resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); x.Norm2(&normx); xexact.Norm2(&normxexact); if (verbose) cout << "2-norm of computed solution = " << normx << endl << "2-norm of exact solution = " << normxexact << endl << "2-norm of difference between computed and exact solution = " << residual << endl; if (verbose1 && residual>1.0e-5) { if (verbose) cout << "Difference between computed and exact solution appears large..." << endl << "Computing norm of A times this difference. If this norm is small, then matrix is singular" << endl; Epetra_Vector bdiff(b); assert(A.Multiply(false, resid, bdiff)==0); assert(bdiff.Norm2(&residual)==0); if (verbose) cout << "2-norm of A times difference between computed and exact solution = " << residual << endl; } if (ILUK!=0) delete ILUK; if (IlukGraph!=0) delete IlukGraph; #ifdef EPETRA_MPI MPI_Finalize() ; #endif return 0 ; }
int main (int argc, char *argv[]) { using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::Comm; using Teuchos::CommandLineProcessor; using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::OSTab; using Teuchos::ptr; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using std::cout; using std::endl; bool success = true; // May be changed by tests Teuchos::oblackholestream blackHole; //Teuchos::GlobalMPISession (&argc, &argv, &blackHole); MPI_Init (&argc, &argv); // // Construct communicators, and verify that we are on 4 processors. // // Construct a Teuchos Comm object. RCP<const Comm<int> > teuchosComm = Teuchos::DefaultComm<int>::getComm(); const int numProcs = teuchosComm->getSize(); const int pid = teuchosComm->getRank(); RCP<FancyOStream> pOut = getFancyOStream (rcpFromRef ((pid == 0) ? std::cout : blackHole)); FancyOStream& out = *pOut; // Verify that we are on four processors (which manifests the bug). if (teuchosComm->getSize() != 4) { out << "This test must be run on four processors. Exiting ..." << endl; return EXIT_FAILURE; } // We also need an Epetra Comm, so that we can compare Tpetra and // Epetra results. Epetra_MpiComm epetraComm (MPI_COMM_WORLD); // // Default values of command-line options. // bool verbose = false; bool printEpetra = false; bool printTpetra = false; CommandLineProcessor cmdp (false,true); // // Set command-line options. // cmdp.setOption ("verbose", "quiet", &verbose, "Print verbose output."); // Epetra and Tpetra output will ask the Maps and Import objects to // print themselves in distributed, maximally verbose fashion. It's // best to turn on either Epetra or Tpetra, but not both. Then you // can compare their output side by side. cmdp.setOption ("printEpetra", "dontPrintEpetra", &printEpetra, "Print Epetra output (in verbose mode only)."); cmdp.setOption ("printTpetra", "dontPrintTpetra", &printTpetra, "Print Tpetra output (in verbose mode only)."); // Parse command-line options. if (cmdp.parse (argc,argv) != CommandLineProcessor::PARSE_SUCCESSFUL) { out << "End Result: TEST FAILED" << endl; MPI_Finalize (); return EXIT_FAILURE; } if (verbose) { out << "Running test on " << numProcs << " process" << (numProcs != 1 ? "es" : "") << "." << endl; } // The maps for this problem are derived from a 3D structured mesh. // In this example, the dimensions are 4x4x2 and there are 2 // processors assigned to the first dimension and 2 processors // assigned to the second dimension, with no parallel decomposition // along the third dimension. The "owned" arrays represent the // one-to-one map, with each array representing a 2x2x2 slice. If // DIMENSIONS == 2, then only the first 4 values will be used, // representing a 2x2(x1) slice. int owned0[8] = { 0, 1, 4, 5,16,17,20,21}; int owned1[8] = { 2, 3, 6, 7,18,19,22,23}; int owned2[8] = { 8, 9,12,13,24,25,28,29}; int owned3[8] = {10,11,14,15,26,27,30,31}; // The "overlap" arrays represent the map with communication // elements, with each array representing a 3x3x2 slice. If // DIMENSIONS == 2, then only the first 9 values will be used, // representing a 3x3(x1) slice. int overlap0[18] = {0,1,2,4, 5, 6, 8, 9,10,16,17,18,20,21,22,24,25,26}; int overlap1[18] = {1,2,3,5, 6, 7, 9,10,11,17,18,19,21,22,23,25,26,27}; int overlap2[18] = {4,5,6,8, 9,10,12,13,14,20,21,22,24,25,26,28,29,30}; int overlap3[18] = {5,6,7,9,10,11,13,14,15,21,22,23,25,26,27,29,30,31}; // Construct the owned and overlap maps for both Epetra and Tpetra. int* owned; int* overlap; if (pid == 0) { owned = owned0; overlap = overlap0; } else if (pid == 1) { owned = owned1; overlap = overlap1; } else if (pid == 2) { owned = owned2; overlap = overlap2; } else { owned = owned3; overlap = overlap3; } #if DIMENSIONS == 2 int ownedSize = 4; int overlapSize = 9; #elif DIMENSIONS == 3 int ownedSize = 8; int overlapSize = 18; #endif // Create the two Epetra Maps. Source for the Import is the owned // map; target for the Import is the overlap map. Epetra_Map epetraOwnedMap ( -1, ownedSize, owned, 0, epetraComm); Epetra_Map epetraOverlapMap (-1, overlapSize, overlap, 0, epetraComm); if (verbose && printEpetra) { // Have the Epetra_Map objects describe themselves. // // Epetra_BlockMap::Print() takes an std::ostream&, and expects // all MPI processes to be able to write to it. (The method // handles its own synchronization.) out << "Epetra owned map:" << endl; epetraOwnedMap.Print (std::cout); out << "Epetra overlap map:" << endl; epetraOverlapMap.Print (std::cout); } // Create the two Tpetra Maps. The "invalid" global element count // input tells Tpetra::Map to compute the global number of elements // itself. const int invalid = Teuchos::OrdinalTraits<int>::invalid(); RCP<Tpetra::Map<int> > tpetraOwnedMap = rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (owned, ownedSize), 0, teuchosComm)); tpetraOwnedMap->setObjectLabel ("Owned Map"); RCP<Tpetra::Map<int> > tpetraOverlapMap = rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (overlap, overlapSize), 0, teuchosComm)); tpetraOverlapMap->setObjectLabel ("Overlap Map"); // In verbose mode, have the Tpetra::Map objects describe themselves. if (verbose && printTpetra) { Teuchos::EVerbosityLevel verb = Teuchos::VERB_EXTREME; // Tpetra::Map::describe() takes a FancyOStream, but expects all // MPI processes to be able to write to it. (The method handles // its own synchronization.) RCP<FancyOStream> globalOut = getFancyOStream (rcpFromRef (std::cout)); out << "Tpetra owned map:" << endl; { OSTab tab (globalOut); tpetraOwnedMap->describe (*globalOut, verb); } out << "Tpetra overlap map:" << endl; { OSTab tab (globalOut); tpetraOverlapMap->describe (*globalOut, verb); } } // Use the owned and overlap maps to construct an importer for both // Epetra and Tpetra. Epetra_Import epetraImporter (epetraOverlapMap, epetraOwnedMap ); Tpetra::Import<int> tpetraImporter (tpetraOwnedMap , tpetraOverlapMap); // In verbose mode, have the Epetra_Import object describe itself. if (verbose && printEpetra) { out << "Epetra importer:" << endl; // The importer's Print() method takes an std::ostream& and plans // to write to it on all MPI processes (handling synchronization // itself). epetraImporter.Print (std::cout); out << endl; } // In verbose mode, have the Tpetra::Import object describe itself. if (verbose && printTpetra) { out << "Tpetra importer:" << endl; // The importer doesn't implement Teuchos::Describable. It wants // std::cout and plans to write to it on all MPI processes (with // its own synchronization). tpetraImporter.print (std::cout); out << endl; } // Construct owned and overlap vectors for both Epetra and Tpetra. Epetra_Vector epetraOwnedVector (epetraOwnedMap ); Epetra_Vector epetraOverlapVector (epetraOverlapMap); Tpetra::Vector<double,int> tpetraOwnedVector (tpetraOwnedMap ); Tpetra::Vector<double,int> tpetraOverlapVector (tpetraOverlapMap); // The test is as follows: initialize the owned and overlap vectors // with global IDs in the owned regions. Initialize the overlap // vectors to equal -1 in the overlap regions. Then perform a // communication from the owned vectors to the overlap vectors. The // resulting overlap vectors should have global IDs everywhere and // all of the -1 values should be overwritten. // Initialize. We cannot assign directly to the Tpetra Vectors; // instead, we extract nonconst views and assign to those. The // results aren't guaranteed to be committed to the vector unless // the views are released (by assigning Teuchos::null to them). epetraOverlapVector.PutScalar(-1); tpetraOverlapVector.putScalar(-1); ArrayRCP<double> tpetraOwnedArray = tpetraOwnedVector.getDataNonConst(0); ArrayRCP<double> tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0); for (int owned_lid = 0; owned_lid < tpetraOwnedMap->getNodeElementList().size(); ++owned_lid) { int gid = tpetraOwnedMap->getGlobalElement(owned_lid); int overlap_lid = tpetraOverlapMap->getLocalElement(gid); epetraOwnedVector[owned_lid] = gid; epetraOverlapVector[overlap_lid] = gid; tpetraOwnedArray[owned_lid] = gid; tpetraOverlapArray[overlap_lid] = gid; } // Make sure that the changes to the Tpetra Vector were committed, // by releasing the nonconst views. tpetraOwnedArray = Teuchos::null; tpetraOverlapArray = Teuchos::null; // Test the Epetra and Tpetra Import. if (verbose) { out << "Testing Import from owned Map to overlap Map:" << endl << endl; } epetraOverlapVector.Import( epetraOwnedVector, epetraImporter, Insert); tpetraOverlapVector.doImport(tpetraOwnedVector, tpetraImporter, Tpetra::INSERT); // Check the Import results. success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, epetraOverlapMap, epetraOverlapVector, tpetraOwnedMap, tpetraOwnedVector, tpetraOverlapMap, tpetraOverlapVector, verbose); const bool testOtherDirections = false; if (testOtherDirections) { // // Reinitialize the Tpetra vectors and test whether Export works. // tpetraOverlapVector.putScalar(-1); tpetraOwnedArray = tpetraOwnedVector.getDataNonConst(0); tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0); for (int owned_lid = 0; owned_lid < tpetraOwnedMap->getNodeElementList().size(); ++owned_lid) { int gid = tpetraOwnedMap->getGlobalElement(owned_lid); int overlap_lid = tpetraOverlapMap->getLocalElement(gid); tpetraOwnedArray[owned_lid] = gid; tpetraOverlapArray[overlap_lid] = gid; } // Make sure that the changes to the Tpetra Vector were committed, // by releasing the nonconst views. tpetraOwnedArray = Teuchos::null; tpetraOverlapArray = Teuchos::null; // Make a Tpetra Export object, and test the export. Tpetra::Export<int> tpetraExporter1 (tpetraOwnedMap, tpetraOverlapMap); if (verbose) { out << "Testing Export from owned Map to overlap Map:" << endl << endl; } tpetraOverlapVector.doExport (tpetraOwnedVector, tpetraExporter1, Tpetra::INSERT); // Check the Export results. success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, epetraOverlapMap, epetraOverlapVector, tpetraOwnedMap, tpetraOwnedVector, tpetraOverlapMap, tpetraOverlapVector, verbose); // // Reinitialize the Tpetra vectors and see what Import in the // other direction does. // tpetraOverlapVector.putScalar(-1); tpetraOwnedArray = tpetraOwnedVector.getDataNonConst(0); tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0); for (int owned_lid = 0; owned_lid < tpetraOwnedMap->getNodeElementList().size(); ++owned_lid) { int gid = tpetraOwnedMap->getGlobalElement(owned_lid); int overlap_lid = tpetraOverlapMap->getLocalElement(gid); tpetraOwnedArray[owned_lid] = gid; tpetraOverlapArray[overlap_lid] = gid; } // Make sure that the changes to the Tpetra Vector were committed, // by releasing the nonconst views. tpetraOwnedArray = Teuchos::null; tpetraOverlapArray = Teuchos::null; if (verbose) { out << "Testing Import from overlap Map to owned Map:" << endl << endl; } Tpetra::Import<int> tpetraImporter2 (tpetraOverlapMap, tpetraOwnedMap); tpetraOwnedVector.doImport (tpetraOverlapVector, tpetraImporter2, Tpetra::INSERT); // Check the Import results. success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, epetraOverlapMap, epetraOverlapVector, tpetraOwnedMap, tpetraOwnedVector, tpetraOverlapMap, tpetraOverlapVector, verbose); } // if testOtherDirections out << "End Result: TEST " << (success ? "PASSED" : "FAILED") << endl; MPI_Finalize (); return success ? EXIT_SUCCESS : EXIT_FAILURE; }
// // TestMrhsSolver.cpp reads in a matrix in Harwell-Boeing format, // calls one of the sparse direct solvers, using multiple right hand sides // (one per solve) and computes the error and residual. // // TestSolver ignores the Harwell-Boeing right hand sides, creating // random right hand sides instead. // // TestMrhsSolver can test either A x = b or A^T x = b. // This can be a bit confusing because sparse direct solvers // use compressed column storage - the transpose of Trilinos' // sparse row storage. // // Matrices: // readA - Serial. As read from the file. // transposeA - Serial. The transpose of readA. // serialA - if (transpose) then transposeA else readA // distributedA - readA distributed to all processes // passA - if ( distributed ) then distributedA else serialA // // int Amesos_TestMrhsSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { // Call routine to read in unsymmetric Triplet matrix NonContiguousMap = true; EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, *map_); Epetra_CrsMatrix A(Copy, *map_, 0); Epetra_RowMatrix * passA = 0; Epetra_MultiVector * passx = 0; Epetra_MultiVector * passb = 0; Epetra_MultiVector * passxexact = 0; Epetra_MultiVector * passresid = 0; Epetra_MultiVector * passtmp = 0; Epetra_MultiVector x(*map_,numsolves); Epetra_MultiVector b(*map_,numsolves); Epetra_MultiVector xexact(*map_,numsolves); Epetra_MultiVector resid(*map_,numsolves); Epetra_MultiVector tmp(*map_,numsolves); Epetra_MultiVector serialx(*readMap,numsolves); Epetra_MultiVector serialb(*readMap,numsolves); Epetra_MultiVector serialxexact(*readMap,numsolves); Epetra_MultiVector serialresid(*readMap,numsolves); Epetra_MultiVector serialtmp(*readMap,numsolves); bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // // Initialize x, b and xexact to the values read in from the file // A.Export(*serialA, exporter, Add); Comm.Barrier(); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = &serialx; passb = &serialb; passxexact = &serialxexact; passresid = &serialresid; passtmp = &serialtmp; } passxexact->SetSeed(131) ; passxexact->Random(); passx->SetSeed(11231) ; passx->Random(); passb->PutScalar( 0.0 ); passA->Multiply( transpose, *passxexact, *passb ) ; Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); double max_resid = 0.0; for ( int j = 0 ; j < special+1 ; j++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { #ifdef TEST_UMFPACK unused code } else if ( SparseSolver == UMFPACK ) { UmfpackOO umfpack( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; umfpack.SetTrans( transpose ) ; umfpack.Solve() ; #endif #ifdef TEST_SUPERLU } else if ( SparseSolver == SuperLU ) { SuperluserialOO superluserial ; superluserial.SetUserMatrix( (Epetra_RowMatrix *) passA) ; superluserial.SetPermc( SuperLU_permc ) ; superluserial.SetTrans( transpose ) ; superluserial.SetUseDGSSV( special == 0 ) ; for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; superluserial.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; superluserial.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); // superluserial.SetRHS( (Epetra_MultiVector *) passb_i ; superluserial.Solve() ; if ( i == 0 ) { SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); } else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_SLUD } else if ( SparseSolver == SuperLUdist ) { SuperludistOO superludist( Problem ) ; superludist.SetTrans( transpose ) ; bool factor = true; for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( superludist.Solve( factor ) ); factor = false; if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_SLUD2 } else if ( SparseSolver == SuperLUdist2 ) { Superludist2_OO superludist2( Problem ) ; superludist2.SetTrans( transpose ) ; bool factor = true; for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( superludist2.Solve( factor ) ); factor = false; if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; Amesos_Dscpack dscpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( dscpack.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( umfpack.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superlu.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( superlu.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_SLUS } else if ( SparseSolver == SuperLU ) { Epetra_SLU superluserial( &Problem ) ; bool factor = true; for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( superluserial.Solve( true, false, factor, 2, -1, true, transpose ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { Teuchos::ParameterList ParamList ; // ParamList.set("OutputLevel",2); Amesos_Klu klu( Problem ) ; // ParamList.set ("ScaleMethod", 0) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( klu.SymbolicFactorization( ) ); for ( int trials = 0 ; trials <= 1 ; trials++) { EPETRA_CHK_ERR( klu.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( klu.Solve( ) ); if ( i == 0 ) { SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); } else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } } #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Lapack lapack( Problem ) ; EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( lapack.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( lapack.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( taucs.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( pardiso.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( paraklete.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #if defined(HAVE_AMESOS_MUMPS) && defined(HAVE_MPI) } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( mumps.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( scalapack.Solve( ) ); if ( i == 0 ) SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); else { if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } } #endif #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Superludist superludist( Problem ) ; EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superludist.NumericFactorization( ) ); SparseDirectTimingVars::SS_Result.Set_First_Time( TotalTime.ElapsedTime() ); for ( int i= 0 ; i < numsolves ; i++ ) { // set up to sovle A X[:,i] = B[:,i] Epetra_Vector *passb_i = (*passb)(i) ; Epetra_Vector *passx_i = (*passx)(i) ; Problem.SetLHS( dynamic_cast<Epetra_MultiVector *>(passx_i) ) ; Problem.SetRHS( dynamic_cast<Epetra_MultiVector *>(passb_i) ); EPETRA_CHK_ERR( superludist.Solve( ) ); if ( i < numsolves-1 ) SparseDirectTimingVars::SS_Result.Set_Middle_Time( TotalTime.ElapsedTime() ); else SparseDirectTimingVars::SS_Result.Set_Last_Time( TotalTime.ElapsedTime() ); } #endif #ifdef TEST_SPOOLES } else if ( SparseSolver == SPOOLES ) { SpoolesOO spooles( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spooles.SetTrans( transpose ) ; spooles.Solve() ; #endif #ifdef TEST_SPOOLESSERIAL } else if ( SparseSolver == SPOOLESSERIAL ) { SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spoolesserial.Solve() ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available (Or not tested with multiple RHS) on this platform #####################\n" << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); // // Compute the error = norm(xcomp - xexact ) // std::vector <double> error(numsolves) ; double max_error = 0.0; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( error[i] > max_error ) max_error = error[i] ; SparseDirectTimingVars::SS_Result.Set_Error(max_error) ; // passxexact->Norm2(&error[0] ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // std::vector <double> residual(numsolves) ; passtmp->PutScalar(0.0); passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( residual[i] > max_resid ) max_resid = residual[i] ; SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ; std::vector <double> bnorm(numsolves); passb->Norm2( &bnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ; std::vector <double> xnorm(numsolves); passx->Norm2( &xnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ; } delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0; }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif int MyPID = Comm.MyPID(); // matrix downloaded from MatrixMarket char FileName[] = "../HBMatrices/fidap005.rua"; Epetra_Map * readMap; // Pointers because of Trilinos_Util_ReadHb2Epetra Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(FileName, Comm, readMap, readA, readx, readb, readxexact); int NumGlobalElements = readMap->NumGlobalElements(); // Create uniform distributed map Epetra_Map map(NumGlobalElements, 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; A.FillComplete(); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if( MyPID==0 ) { cout << "Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << "Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << "Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } delete readA; delete readx; delete readb; delete readxexact; delete readMap; #ifdef HAVE_MPI MPI_Finalize() ; #endif return(EXIT_SUCCESS); }
// // Diagonal: 0=no change, 1=eliminate entry // from the map for the largest row element in process 0 // 2=add diagonal entries to the matrix, with a zero value // (assume row map contains all diagonal entries). // // ReindexRowMap: // 0=no change, 1= add 2 (still contiguous), 2=non-contiguous // // ReindexColMap // 0=same as RowMap, 1=add 4 - Different From RowMap, but contiguous) // // RangeMap: // 0=no change, 1=serial map, 2=bizarre distribution, 3=replicated map // // DomainMap: // 0=no change, 1=serial map, 2=bizarre distribution, 3=replicated map // RCP<Epetra_CrsMatrix> NewMatNewMap(Epetra_CrsMatrix& In, int Diagonal, int ReindexRowMap, int ReindexColMap, int RangeMapType, int DomainMapType ) { // // If we are making no change, return the original matrix (which has a linear map) // #if 0 std::cout << __FILE__ << "::" << __LINE__ << " " << Diagonal << " " << ReindexRowMap << " " << ReindexColMap << " " << RangeMapType << " " << DomainMapType << " " << std::endl ; #endif if ( Diagonal + ReindexRowMap + ReindexColMap + RangeMapType + DomainMapType == 0 ) { RCP<Epetra_CrsMatrix> ReturnOrig = rcp( &In, false ); return ReturnOrig ; } // // Diagonal==2 is used for a different purpose - // Making sure that the diagonal of the matrix is non-empty. // Note: The diagonal must exist in In.RowMap(). // if ( Diagonal == 2 ) { assert( ReindexRowMap==0 && ReindexColMap == 0 ) ; } int (*RowPermute)(int in) = 0; int (*ColPermute)(int in) = 0; assert( Diagonal >= 0 && Diagonal <= 2 ); assert( ReindexRowMap>=0 && ReindexRowMap<=2 ); assert( ReindexColMap>=0 && ReindexColMap<=1 ); assert( RangeMapType>=0 && RangeMapType<=3 ); assert( DomainMapType>=0 && DomainMapType<=3 ); Epetra_Map DomainMap = In.DomainMap(); Epetra_Map RangeMap = In.RangeMap(); Epetra_Map ColMap = In.ColMap(); Epetra_Map RowMap = In.RowMap(); int NumMyRowElements = RowMap.NumMyElements(); int NumMyColElements = ColMap.NumMyElements(); int NumMyRangeElements = RangeMap.NumMyElements(); int NumMyDomainElements = DomainMap.NumMyElements(); int NumGlobalRowElements = RowMap.NumGlobalElements(); int NumGlobalColElements = ColMap.NumGlobalElements(); int NumGlobalRangeElements = RangeMap.NumGlobalElements(); int NumGlobalDomainElements = DomainMap.NumGlobalElements(); assert( NumGlobalRangeElements == NumGlobalDomainElements ) ; std::vector<int> MyGlobalRowElements( NumMyRowElements ) ; std::vector<int> NumEntriesPerRow( NumMyRowElements ) ; std::vector<int> MyPermutedGlobalRowElements( NumMyRowElements ) ; std::vector<int> MyGlobalColElements( NumMyColElements ) ; std::vector<int> MyPermutedGlobalColElements( NumMyColElements ) ; // Used to create the column map std::vector<int> MyPermutedGlobalColElementTable( NumMyColElements ) ; // To convert local indices to global std::vector<int> MyGlobalRangeElements( NumMyRangeElements ) ; std::vector<int> MyPermutedGlobalRangeElements( NumMyRangeElements ) ; std::vector<int> MyGlobalDomainElements( NumMyDomainElements ) ; std::vector<int> MyPermutedGlobalDomainElements( NumMyDomainElements ) ; RowMap.MyGlobalElements(&MyGlobalRowElements[0]); ColMap.MyGlobalElements(&MyGlobalColElements[0]); RangeMap.MyGlobalElements(&MyGlobalRangeElements[0]); DomainMap.MyGlobalElements(&MyGlobalDomainElements[0]); switch( ReindexRowMap ) { case 0: RowPermute = &NoPermute ; break; case 1: RowPermute = &SmallRowPermute ; break; case 2: RowPermute = BigRowPermute ; break; } switch( ReindexColMap ) { case 0: ColPermute = RowPermute ; break; case 1: ColPermute = &SmallColPermute ; break; } // // Create Serial Range and Domain Maps based on the permuted indexing // int nlocal = 0; if (In.Comm().MyPID()==0) nlocal = NumGlobalRangeElements; std::vector<int> AllIDs( NumGlobalRangeElements ) ; for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDs[i] = (*RowPermute)( i ) ; Epetra_Map SerialRangeMap( -1, nlocal, &AllIDs[0], 0, In.Comm()); std::vector<int> AllIDBs( NumGlobalRangeElements ) ; for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDBs[i] = (*ColPermute)( i ) ; Epetra_Map SerialDomainMap( -1, nlocal, &AllIDBs[0], 0, In.Comm()); // // Create Bizarre Range and Domain Maps based on the permuted indexing // These are nearly serial, having all but one element on process 0 // The goal here is to make sure that we can use Domain and Range maps // that are neither serial, nor distributed in the normal manner. // std::vector<int> AllIDCs( NumGlobalRangeElements ) ; for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDCs[i] = (*ColPermute)( i ) ; if ( In.Comm().NumProc() > 1 ) { if (In.Comm().MyPID()==0) nlocal = NumGlobalRangeElements-1; if (In.Comm().MyPID()==1) { nlocal = 1; AllIDCs[0] = (*ColPermute)( NumGlobalRangeElements - 1 ); } } int iam = In.Comm().MyPID(); Epetra_Map BizarreDomainMap( -1, nlocal, &AllIDCs[0], 0, In.Comm()); std::vector<int> AllIDDs( NumGlobalRangeElements ) ; for ( int i = 0; i < NumGlobalRangeElements ; i++ ) AllIDDs[i] = (*RowPermute)( i ) ; if ( In.Comm().NumProc() > 1 ) { if (In.Comm().MyPID()==0) nlocal = NumGlobalRangeElements-1; if (In.Comm().MyPID()==1) { nlocal = 1; AllIDDs[0] = (*RowPermute)( NumGlobalRangeElements -1 ) ; } } Epetra_Map BizarreRangeMap( -1, nlocal, &AllIDDs[0], 0, In.Comm()); // // Compute the column map // // If Diagonal==1, remove the column corresponding to the last row owned // by process 0. Removing this column from a tridiagonal matrix, leaves // a disconnected, but non-singular matrix. // int NumMyColElementsOut = 0 ; int NumGlobalColElementsOut ; if ( Diagonal == 1 ) NumGlobalColElementsOut = NumGlobalColElements-1; else NumGlobalColElementsOut = NumGlobalColElements; if ( Diagonal == 1 && iam==0 ) { for ( int i=0; i < NumMyColElements ; i++ ) { if ( MyGlobalColElements[i] != MyGlobalRowElements[NumMyRowElements-1] ) { MyPermutedGlobalColElements[NumMyColElementsOut++] = (*ColPermute)( MyGlobalColElements[i] ) ; } } assert( NumMyColElementsOut == NumMyColElements-1 ); } else { for ( int i=0; i < NumMyColElements ; i++ ) MyPermutedGlobalColElements[i] = (*ColPermute)( MyGlobalColElements[i] ) ; NumMyColElementsOut = NumMyColElements ; if ( Diagonal == 2 ) { // For each row, make sure that the column map has this row in it, // if it doesn't, add it to the column map. // Note: MyPermutedGlobalColElements == MyGlobalColElements when // Diagonal==2 because ( Diagonal == 2 ) implies: // ReindexRowMap==0 && ReindexColMap == 0 - see assert above for ( int i=0; i < NumMyRowElements ; i++ ) { bool MissingDiagonal = true; for ( int j=0; j < NumMyColElements; j++ ) { if ( MyGlobalRowElements[i] == MyGlobalColElements[j] ) { MissingDiagonal = false; } } if ( MissingDiagonal ) { MyPermutedGlobalColElements.resize(NumMyColElements+1); MyPermutedGlobalColElements[NumMyColElementsOut] = MyGlobalRowElements[i]; NumMyColElementsOut++; } } In.Comm().SumAll(&NumMyColElementsOut,&NumGlobalColElementsOut,1); } } // // These tables are used both as the permutation tables and to create the maps. // for ( int i=0; i < NumMyColElements ; i++ ) MyPermutedGlobalColElementTable[i] = (*ColPermute)( MyGlobalColElements[i] ) ; for ( int i=0; i < NumMyRowElements ; i++ ) MyPermutedGlobalRowElements[i] = (*RowPermute)( MyGlobalRowElements[i] ) ; for ( int i=0; i < NumMyRangeElements ; i++ ) MyPermutedGlobalRangeElements[i] = (*RowPermute)( MyGlobalRangeElements[i] ) ; for ( int i=0; i < NumMyDomainElements ; i++ ) MyPermutedGlobalDomainElements[i] = (*ColPermute)( MyGlobalDomainElements[i] ) ; RCP<Epetra_Map> PermutedRowMap = rcp( new Epetra_Map( NumGlobalRowElements, NumMyRowElements, &MyPermutedGlobalRowElements[0], 0, In.Comm() ) ); RCP<Epetra_Map> PermutedColMap = rcp( new Epetra_Map( NumGlobalColElementsOut, NumMyColElementsOut, &MyPermutedGlobalColElements[0], 0, In.Comm() ) ); RCP<Epetra_Map> PermutedRangeMap = rcp( new Epetra_Map( NumGlobalRangeElements, NumMyRangeElements, &MyPermutedGlobalRangeElements[0], 0, In.Comm() ) ); RCP<Epetra_Map> PermutedDomainMap = rcp( new Epetra_Map( NumGlobalDomainElements, NumMyDomainElements, &MyPermutedGlobalDomainElements[0], 0, In.Comm() ) ); // // These vectors are filled and then passed to InsertGlobalValues // std::vector<int> ThisRowIndices( In.MaxNumEntries() ); std::vector<double> ThisRowValues( In.MaxNumEntries() ); std::vector<int> PermutedGlobalColIndices( In.MaxNumEntries() ); //std::cout << __FILE__ << "::" <<__LINE__ << std::endl ; RCP<Epetra_CrsMatrix> Out = rcp( new Epetra_CrsMatrix( Copy, *PermutedRowMap, *PermutedColMap, 0 ) ); for (int i=0; i<NumMyRowElements; i++) { int NumIndicesThisRow = 0; assert( In.ExtractMyRowCopy( i, In.MaxNumEntries(), NumIndicesThisRow, &ThisRowValues[0], &ThisRowIndices[0] ) == 0 ) ; for (int j = 0 ; j < NumIndicesThisRow ; j++ ) { PermutedGlobalColIndices[j] = MyPermutedGlobalColElementTable[ ThisRowIndices[j] ] ; } bool MissingDiagonal = false; if ( Diagonal==2 ) { // assert( MyGlobalRowElements[i] == MyPermutedGlobalRowElements[i] ); MissingDiagonal = true; for( int j =0 ; j < NumIndicesThisRow ; j++ ) { if ( PermutedGlobalColIndices[j] == MyPermutedGlobalRowElements[i] ) { MissingDiagonal = false ; } } #if 0 std::cout << __FILE__ << "::" << __LINE__ << " i = " << i << " MyPermutedGlobalRowElements[i] = " << MyPermutedGlobalRowElements[i] << " MissingDiagonal = " << MissingDiagonal << std::endl ; #endif } if ( MissingDiagonal ) { ThisRowValues.resize(NumIndicesThisRow+1) ; ThisRowValues[NumIndicesThisRow] = 0.0; PermutedGlobalColIndices.resize(NumIndicesThisRow+1); PermutedGlobalColIndices[NumIndicesThisRow] = MyPermutedGlobalRowElements[i] ; #if 0 std::cout << __FILE__ << "::" << __LINE__ << " i = " << i << "NumIndicesThisRow = " << NumIndicesThisRow << "ThisRowValues[NumIndicesThisRow = " << ThisRowValues[NumIndicesThisRow] << " PermutedGlobalColIndices[NumIndcesThisRow] = " << PermutedGlobalColIndices[NumIndicesThisRow] << std::endl ; #endif NumIndicesThisRow++ ; } assert( Out->InsertGlobalValues( MyPermutedGlobalRowElements[i], NumIndicesThisRow, &ThisRowValues[0], &PermutedGlobalColIndices[0] ) >= 0 ); } // Epetra_LocalMap ReplicatedMap( NumGlobalRangeElements, 0, In.Comm() ); RCP<Epetra_Map> OutRangeMap ; RCP<Epetra_Map> OutDomainMap ; switch( RangeMapType ) { case 0: OutRangeMap = PermutedRangeMap ; break; case 1: OutRangeMap = rcp(&SerialRangeMap, false); break; case 2: OutRangeMap = rcp(&BizarreRangeMap, false); break; case 3: OutRangeMap = rcp(&ReplicatedMap, false); break; } // switch( DomainMapType ) { switch( DomainMapType ) { case 0: OutDomainMap = PermutedDomainMap ; break; case 1: OutDomainMap = rcp(&SerialDomainMap, false); break; case 2: OutDomainMap = rcp(&BizarreDomainMap, false); break; case 3: OutDomainMap = rcp(&ReplicatedMap, false); break; } #if 0 assert(Out->FillComplete( *PermutedDomainMap, *PermutedRangeMap )==0); #else assert(Out->FillComplete( *OutDomainMap, *OutRangeMap )==0); #endif #if 0 std::cout << __FILE__ << "::" << __LINE__ << std::endl ; Out->Print( std::cout ) ; #endif return Out; }
int main (int argc, char *argv[]) { using namespace Anasazi; using Teuchos::RCP; using Teuchos::rcp; using std::endl; #ifdef HAVE_MPI // Initialize MPI MPI_Init (&argc, &argv); #endif // HAVE_MPI // Create an Epetra communicator #ifdef HAVE_MPI Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif // HAVE_MPI // Create an Anasazi output manager BasicOutputManager<double> printer; printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl; // Get the sorting std::string from the command line std::string which ("LM"); Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM)."); if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return -1; } // Dimension of the matrix // // Discretization points in any one direction. const int nx = 10; // Size of matrix nx*nx const int NumGlobalElements = nx*nx; // Construct a Map that puts approximately the same number of // equations on each process. Epetra_Map Map (NumGlobalElements, 0, Comm); // Get update list and number of local equations from newly created Map. int NumMyElements = Map.NumMyElements (); std::vector<int> MyGlobalElements (NumMyElements); Map.MyGlobalElements (&MyGlobalElements[0]); // Create an integer vector NumNz that is used to build the Petra // matrix. NumNz[i] is the number of OFF-DIAGONAL terms for the // i-th global equation on this process. std::vector<int> NumNz (NumMyElements); /* We are building a matrix of block structure: | T -I | |-I T -I | | -I T | | ... -I| | -I T| where each block is dimension nx by nx and the matrix is on the order of nx*nx. The block T is a tridiagonal matrix. */ for (int i=0; i<NumMyElements; ++i) { if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 || MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) { NumNz[i] = 3; } else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) || MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) { NumNz[i] = 4; } else { NumNz[i] = 5; } } // Create an Epetra_Matrix RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Copy, Map, &NumNz[0])); // Compute coefficients for discrete convection-diffution operator const double one = 1.0; std::vector<double> Values(4); std::vector<int> Indices(4); double rho = 0.0; double h = one /(nx+1); double h2 = h*h; double c = 5.0e-01*rho/ h; Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2; double diag = 4.0 / h2; int NumEntries; for (int i=0; i<NumMyElements; ++i) { if (MyGlobalElements[i]==0) { Indices[0] = 1; Indices[1] = nx; NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); assert( info==0 ); } else if (MyGlobalElements[i] == nx*(nx-1)) { Indices[0] = nx*(nx-1)+1; Indices[1] = nx*(nx-2); NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); assert( info==0 ); } else if (MyGlobalElements[i] == nx-1) { Indices[0] = nx-2; NumEntries = 1; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); Indices[0] = 2*nx-1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); assert( info==0 ); } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-2; NumEntries = 1; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); Indices[0] = nx*(nx-1)-1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); assert( info==0 ); } else if (MyGlobalElements[i] < nx) { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]+nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); } else if (MyGlobalElements[i] > nx*(nx-1)) { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]-nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); } else if (MyGlobalElements[i]%nx == 0) { Indices[0] = MyGlobalElements[i]+1; Indices[1] = MyGlobalElements[i]-nx; Indices[2] = MyGlobalElements[i]+nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); assert( info==0 ); } else if ((MyGlobalElements[i]+1)%nx == 0) { Indices[0] = MyGlobalElements[i]-nx; Indices[1] = MyGlobalElements[i]+nx; NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); assert( info==0 ); Indices[0] = MyGlobalElements[i]-1; NumEntries = 1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]-nx; Indices[3] = MyGlobalElements[i]+nx; NumEntries = 4; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); } // Put in the diagonal entry int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]); assert( info==0 ); } // Finish up int info = A->FillComplete (); assert( info==0 ); A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks // Create a identity matrix for the temporary mass matrix RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Copy, Map, 1)); for (int i=0; i<NumMyElements; i++) { Values[0] = one; Indices[0] = i; NumEntries = 1; info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert( info==0 ); } // Finish up info = M->FillComplete (); assert( info==0 ); M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks //************************************ // Call the LOBPCG solver manager //*********************************** // // Variables used for the LOBPCG Method const int nev = 10; const int blockSize = 5; const int maxIters = 500; const double tol = 1.0e-8; typedef Epetra_MultiVector MV; typedef Epetra_Operator OP; typedef MultiVecTraits<double, Epetra_MultiVector> MVT; // Create an Epetra_MultiVector for an initial vector to start the // solver. Note: This needs to have the same number of columns as // the blocksize. RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize)); ivec->Random (); // fill the initial vector with random values // Create the eigenproblem. RCP<BasicEigenproblem<double, MV, OP> > MyProblem = rcp (new BasicEigenproblem<double, MV, OP> (A, ivec)); // Inform the eigenproblem that the operator A is symmetric MyProblem->setHermitian (true); // Set the number of eigenvalues requested MyProblem->setNEV (nev); // Tell the eigenproblem that you are finishing passing it information. const bool success = MyProblem->setProblem (); if (! success) { printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n"); #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return -1; } // Create parameter list to pass into the solver manager Teuchos::ParameterList MyPL; MyPL.set ("Which", which); MyPL.set ("Block Size", blockSize); MyPL.set ("Maximum Iterations", maxIters); MyPL.set ("Convergence Tolerance", tol); MyPL.set ("Full Ortho", true); MyPL.set ("Use Locking", true); // Create the solver manager LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL); // Solve the problem ReturnType returnCode = MySolverMan.solve (); // Get the eigenvalues and eigenvectors from the eigenproblem Eigensolution<double,MV> sol = MyProblem->getSolution (); std::vector<Value<double> > evals = sol.Evals; RCP<MV> evecs = sol.Evecs; // Compute residuals. std::vector<double> normR (sol.numVecs); if (sol.numVecs > 0) { Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs); Epetra_MultiVector tempAevec (Map, sol.numVecs ); T.putScalar (0.0); for (int i = 0; i < sol.numVecs; ++i) { T(i,i) = evals[i].realpart; } A->Apply (*evecs, tempAevec); MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec); MVT::MvNorm (tempAevec, normR); } // Print the results std::ostringstream os; os.setf (std::ios_base::right, std::ios_base::adjustfield); os << "Solver manager returned " << (returnCode == Converged ? "converged." : "unconverged.") << endl; os << endl; os << "------------------------------------------------------" << endl; os << std::setw(16) << "Eigenvalue" << std::setw(18) << "Direct Residual" << endl; os << "------------------------------------------------------" << endl; for (int i = 0; i < sol.numVecs; ++i) { os << std::setw(16) << evals[i].realpart << std::setw(18) << normR[i] / evals[i].realpart << endl; } os << "------------------------------------------------------" << endl; printer.print (Errors, os.str ()); #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return 0; }
void build_simple_matrix( Epetra_Comm &comm, // Communicator to use Epetra_CrsMatrix *&A, // OUTPUT: Matrix returned itype nGlobalRows, // Number of global matrix rows and columns bool testEpetra64, // if true, add 2*INT_MAX to each global ID // to exercise Epetra64 bool verbose // if true, print out matrix information ) { Epetra_Map *rowMap = NULL; // Row map for the created matrix Epetra_Map *colMap = NULL; // Col map for the created matrix Epetra_Map *vectorMap = NULL; // Range/Domain map for the created matrix long long offsetEpetra64; build_maps(nGlobalRows, testEpetra64, comm, &vectorMap, &rowMap, &colMap, offsetEpetra64, verbose); // Create an integer vector nnzPerRow that is used to build the Epetra Matrix. // nnzPerRow[i] is the number of entries for the ith global equation int nMyRows = rowMap->NumMyElements(); std::vector<int> nnzPerRow(nMyRows+1, 0); // Also create lists of the nonzeros to be assigned to processors. // To save programming time and complexity, these vectors are allocated // bigger than they may actually be needed. std::vector<itype> iv(3*nMyRows+1); std::vector<itype> jv(3*nMyRows+1); std::vector<double> vv(3*nMyRows+1); itype nMyNonzeros = 0; for (itype i = 0, myrowcnt = 0; i < nGlobalRows; i++) { if (rowMap->MyGID(i+offsetEpetra64)) { // This processor owns part of this row; see whether it owns the nonzeros if (i > 0 && (!colMap || colMap->MyGID(i-1+offsetEpetra64))) { iv[nMyNonzeros] = i + offsetEpetra64; jv[nMyNonzeros] = i-1 + offsetEpetra64; vv[nMyNonzeros] = -1; nMyNonzeros++; nnzPerRow[myrowcnt]++; } if (!colMap || colMap->MyGID(i+offsetEpetra64)) { iv[nMyNonzeros] = i + offsetEpetra64; jv[nMyNonzeros] = i + offsetEpetra64; vv[nMyNonzeros] = ((i == 0 || i == nGlobalRows-1) ? 1. : 2.); nMyNonzeros++; nnzPerRow[myrowcnt]++; } if (i < nGlobalRows - 1 && (!colMap || colMap->MyGID(i+1+offsetEpetra64))) { iv[nMyNonzeros] = i + offsetEpetra64; jv[nMyNonzeros] = i+1 + offsetEpetra64; vv[nMyNonzeros] = -1; nMyNonzeros++; nnzPerRow[myrowcnt]++; } myrowcnt++; } } // Create an Epetra_Matrix A = new Epetra_CrsMatrix(Copy, *rowMap, &nnzPerRow[0], true); int info; for (int sum = 0, i=0; i < nMyRows; i++) { if (nnzPerRow[i]) { info = A->InsertGlobalValues(iv[sum],nnzPerRow[i],&vv[sum],&jv[sum]); assert(info==0); sum += nnzPerRow[i]; } } // Finish up if (vectorMap) info = A->FillComplete(*vectorMap, *vectorMap); else info = A->FillComplete(); assert(info==0); }
int main(int argc, char *argv[]) { // Initialize MPI #ifdef HAVE_MPI MPI_Init(&argc,&argv); #endif // Create a communicator for Epetra objects #ifdef HAVE_MPI Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif bool verbose = false; bool success = false; try { int globalLength = 100; // This should suffice if (argc > 1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; // Get the process ID and the total number of processors int MyPID = Comm.MyPID(); // Set up the printing utilities Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr = Teuchos::rcp(new Teuchos::ParameterList); Teuchos::ParameterList& noxParams = *(noxParamsPtr.get()); // Only print output if the "-v" flag is set on the command line Teuchos::ParameterList& printParams = noxParams.sublist("Printing"); printParams.set("MyPID", MyPID); printParams.set("Output Precision", 5); printParams.set("Output Processor", 0); if( verbose ) printParams.set("Output Information", NOX::Utils::OuterIteration + NOX::Utils::OuterIterationStatusTest + NOX::Utils::InnerIteration + NOX::Utils::Parameters + NOX::Utils::Details + NOX::Utils::Warning + NOX::Utils::TestDetails); else printParams.set("Output Information", NOX::Utils::Error); NOX::Utils printing(printParams); // Identify the test problem if (printing.isPrintType(NOX::Utils::TestDetails)) printing.out() << "Starting epetra/NOX_Vector/NOX_Vector.exe" << std::endl; // Create a TestCompare class NOX::TestCompare tester( printing.out(), printing); double tolerance = 1.e-12; NOX::TestCompare::CompareType aComp = NOX::TestCompare::Absolute; // Identify processor information #ifdef HAVE_MPI printing.out() << "Parallel Run" << std::endl; printing.out() << "Number of processors = " << Comm.NumProc() << std::endl; printing.out() << "Print Process = " << MyPID << std::endl; Comm.Barrier(); if (printing.isPrintType(NOX::Utils::TestDetails)) printing.out() << "Process " << MyPID << " is alive!" << std::endl; Comm.Barrier(); #else printing.out() << "Serial Run" << std::endl; #endif // Create a map describing data distribution Epetra_Map * standardMap = new Epetra_Map(globalLength, 0, Comm); // Return value int status = 0; // *** Start Testing Here!!! *** // First create the Epetra_Vector needed to construct our NOX vector Epetra_Vector * epetraVec = new Epetra_Vector(*standardMap, true); NOX::Epetra::Vector * noxVec1 = new NOX::Epetra::Vector(*epetraVec, NOX::DeepCopy); delete epetraVec; epetraVec = 0; NOX::Epetra::Vector * noxVec2 = new NOX::Epetra::Vector(*noxVec1); noxVec2->init(1.0); // Test our norms NOX::Abstract::Vector::NormType oneNorm = NOX::Abstract::Vector::OneNorm, twoNorm = NOX::Abstract::Vector::TwoNorm, infNorm = NOX::Abstract::Vector::MaxNorm; double expectedOneNorm = (double) globalLength, expectedTwoNorm = sqrt( (double) globalLength), expectedInfNorm = 1.0; status += tester.testValue( noxVec2->norm(oneNorm), expectedOneNorm, tolerance, "One-Norm Test", aComp); status += tester.testValue( noxVec2->norm(twoNorm), expectedTwoNorm, tolerance, "Two-Norm Test", aComp); status += tester.testValue( noxVec2->norm(infNorm), expectedInfNorm, tolerance, "Max-Norm Test", aComp); // Test random, reciprocal and dot methods noxVec1->random(); // Threshold values since we want to do a reciprocal int myLength = standardMap->NumMyElements(); for( int i = 0; i < myLength; ++i ) if( fabs(noxVec1->getEpetraVector()[i]) < 1.e-8 ) noxVec1->getEpetraVector()[i] = 1.e-8; noxVec2->reciprocal(*noxVec1); double product = noxVec1->innerProduct(*noxVec2); status += tester.testValue( product, expectedOneNorm, tolerance, "Random, Reciprocal and Dot Test", aComp); // Test abs and weighted-norm methods /* ---------------------------- NOT SUPPORTED AT THIS TIME ---------------------------- noxVec2->abs(*noxVec2); double wNorm = noxVec1->norm(*noxVec2); status += tester.testValue( wNorm, noxVec1->norm(oneNorm), tolerance, "Abs and Weighted-Norm Test", aComp); */ // Test operator= , abs, update and scale methods (*noxVec2) = (*noxVec1); noxVec2->abs(*noxVec2); double sumAll = noxVec1->norm(oneNorm); noxVec2->update( 1.0, *noxVec1, 1.0 ); noxVec2->scale(0.5); double sumPositive = noxVec2->norm(oneNorm); (*noxVec2) = (*noxVec1); noxVec2->abs(*noxVec2); noxVec2->update( 1.0, *noxVec1, -1.0 ); noxVec2->scale(0.5); double sumNegative = noxVec2->norm(oneNorm); status += tester.testValue( (sumPositive + sumNegative), sumAll, tolerance, "Abs, Operator= , Update and Scale Test", aComp); success = status==0; if (success) printing.out() << "Test passed!" << std::endl; else printing.out() << "Test failed!" << std::endl; delete noxVec2; delete noxVec1; delete standardMap; } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); #ifdef HAVE_MPI MPI_Finalize(); #endif return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); }
int Drumm2(const Epetra_Map& map, bool verbose) { //Simple 2-element problem (element as in "finite-element") from //Clif Drumm. Two triangular elements, one per processor, as shown //here: // // *----* // 3|\ 2| // | \ | // | 0\1| // | \| // *----* // 0 1 // //Element 0 on processor 0, element 1 on processor 1. //Processor 0 will own nodes 0,1,3 and processor 1 will own node 2. //Each processor will pass a 3x3 element-connectivity-matrix to //Epetra_FECrsGraph. //After GlobalAssemble(), the graph should be as follows: // // row 0: 2 1 0 1 //proc 0 row 1: 1 4 1 2 // row 2: 0 1 2 1 //---------------------------------- //proc 1 row 3: 1 2 1 4 // int numProcs = map.Comm().NumProc(); int localProc = map.Comm().MyPID(); if (numProcs != 2) return(0); int indexBase = 0, ierr = 0; int numMyNodes = 3; long long* myNodes = new long long[numMyNodes]; if (localProc == 0) { myNodes[0] = 0; myNodes[1] = 1; myNodes[2] = 3; } else { numMyNodes = 1; myNodes[0] = 2; } Epetra_Map Map((long long) -1, numMyNodes, myNodes, indexBase, map.Comm()); int rowLengths = 3; Epetra_FECrsGraph A(Copy, Map, rowLengths); if (localProc != 0) { numMyNodes = 3; myNodes[0] = 1; myNodes[1] = 2; myNodes[2] = 3; } EPETRA_TEST_ERR( A.InsertGlobalIndices(numMyNodes, myNodes, numMyNodes, myNodes),ierr); EPETRA_TEST_ERR( A.GlobalAssemble(), ierr ); if (verbose) { A.Print(std::cout); } delete [] myNodes; return(0); }
int Amesos_TestSolver( Epetra_Comm &Comm, char *matrix_file, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { // Call routine to read in unsymmetric Triplet matrix NonContiguousMap = true; EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } Epetra_RowMatrix * passA = 0; Epetra_Vector * passx = 0; Epetra_Vector * passb = 0; Epetra_Vector * passxexact = 0; Epetra_Vector * passresid = 0; Epetra_Vector * passtmp = 0; // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } Epetra_CrsMatrix A(Copy, *map_, 0); const Epetra_Map &OriginalMap = serialA->RowMatrixRowMap() ; assert( OriginalMap.SameAs(*readMap) ); Epetra_Export exporter(OriginalMap, *map_); Epetra_Export exporter2(OriginalMap, *map_); Epetra_Export MatrixExporter(OriginalMap, *map_); Epetra_CrsMatrix AwithDiag(Copy, *map_, 0); Epetra_Vector x(*map_); Epetra_Vector b(*map_); Epetra_Vector xexact(*map_); Epetra_Vector resid(*map_); Epetra_Vector readresid(*readMap); Epetra_Vector tmp(*map_); Epetra_Vector readtmp(*readMap); // Epetra_Vector xcomp(*map_); // X as computed by the solver bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // Create Exporter to distribute read-in matrix and vectors // // Initialize x, b and xexact to the values read in from the file // x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); A.Export(*serialA, exporter, Add); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = readx; passb = readb; passxexact = readxexact; passresid = &readresid; passtmp = &readtmp; } Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); for ( int i = 0; i < 1+special ; i++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { // TEST_UMFPACK is never set by configure #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Superludist A_Superludist( Problem ) ; //ParamList.set( "Redistribute", true ); //ParamList.set( "AddZeroToDiag", true ); Teuchos::ParameterList& SuperludistParams = ParamList.sublist("Superludist") ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_Superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_Superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_Superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_Superludist.NumericFactorization( ) ); EPETRA_CHK_ERR( A_Superludist.Solve( ) ); #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Dscpack A_dscpack( Problem ) ; EPETRA_CHK_ERR( A_dscpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_dscpack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_dscpack.NumericFactorization( ) ); EPETRA_CHK_ERR( A_dscpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack A_scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_scalapack.NumericFactorization( ) ); EPETRA_CHK_ERR( A_scalapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs A_taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_taucs.NumericFactorization( ) ); EPETRA_CHK_ERR( A_taucs.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso A_pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_pardiso.NumericFactorization( ) ); EPETRA_CHK_ERR( A_pardiso.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete A_paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_paraklete.NumericFactorization( ) ); EPETRA_CHK_ERR( A_paraklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_MUMPS } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps A_mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_mumps.NumericFactorization( ) ); EPETRA_CHK_ERR( A_mumps.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu A_superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_superlu.NumericFactorization( ) ); EPETRA_CHK_ERR( A_superlu.Solve( ) ); #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Lapack A_lapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_lapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_lapack.NumericFactorization( ) ); EPETRA_CHK_ERR( A_lapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack A_umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( A_umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( A_umfpack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( A_umfpack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( A_umfpack.NumericFactorization( ) ); EPETRA_CHK_ERR( A_umfpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { using namespace Teuchos; Amesos_Time AT; int setupTimePtr = -1, symTimePtr = -1, numTimePtr = -1, refacTimePtr = -1, solveTimePtr = -1; AT.CreateTimer(Comm, 2); AT.ResetTimer(0); Teuchos::ParameterList ParamList ; // ParamList.set("OutputLevel",2); Amesos_Klu A_klu( Problem ); ParamList.set( "MaxProcs", -3 ); ParamList.set( "TrustMe", false ); // ParamList.set( "Refactorize", true ); EPETRA_CHK_ERR( A_klu.SetParameters( ParamList ) ) ; EPETRA_CHK_ERR( A_klu.SetUseTranspose( transpose ) ); setupTimePtr = AT.AddTime("Setup", setupTimePtr, 0); EPETRA_CHK_ERR( A_klu.SymbolicFactorization( ) ); symTimePtr = AT.AddTime("Symbolic", symTimePtr, 0); EPETRA_CHK_ERR( A_klu.NumericFactorization( ) ); numTimePtr = AT.AddTime("Numeric", numTimePtr, 0); EPETRA_CHK_ERR( A_klu.NumericFactorization( ) ); refacTimePtr = AT.AddTime("Refactor", refacTimePtr, 0); // for ( int i=0; i<100000 ; i++ ) EPETRA_CHK_ERR( A_klu.Solve( ) ); solveTimePtr = AT.AddTime("Solve", solveTimePtr, 0); double SetupTime = AT.GetTime(setupTimePtr); double SymbolicTime = AT.GetTime(symTimePtr); double NumericTime = AT.GetTime(numTimePtr); double RefactorTime = AT.GetTime(refacTimePtr); double SolveTime = AT.GetTime(solveTimePtr); std::cout << __FILE__ << "::" << __LINE__ << " SetupTime = " << SetupTime << std::endl ; std::cout << __FILE__ << "::" << __LINE__ << " SymbolicTime = " << SymbolicTime - SetupTime << std::endl ; std::cout << __FILE__ << "::" << __LINE__ << " NumericTime = " << NumericTime - SymbolicTime<< std::endl ; std::cout << __FILE__ << "::" << __LINE__ << " RefactorTime = " << RefactorTime - NumericTime << std::endl ; std::cout << __FILE__ << "::" << __LINE__ << " SolveTime = " << SolveTime - RefactorTime << std::endl ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available on this platform ##################### ATS\n" << std::endl ; std::cout << " SparseSolver = " << SparseSolver << std::endl ; std::cerr << " SparseSolver = " << SparseSolver << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); } // end for (int i=0; i<special; i++ ) // // Compute the error = norm(xcomp - xexact ) // double error; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error); SparseDirectTimingVars::SS_Result.Set_Error(error) ; // passxexact->Norm2(&error ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // double residual ; passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual); SparseDirectTimingVars::SS_Result.Set_Residual(residual) ; double bnorm; passb->Norm2( &bnorm ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm) ; double xnorm; passx->Norm2( &xnorm ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm) ; delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0; }