//============================================================================== int Ifpack_ML:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (IsComputed() == false) IFPACK_CHK_ERR(-1); int numVectors = X.NumVectors(); if (numVectors != Y.NumVectors()) IFPACK_CHK_ERR(-1); // wrong input Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); for (int i=0; i<numVectors; i++) { IFPACK_CHK_ERR(MLPrec_->ApplyInverse(*Xcopy,Y)); } ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
//======================================================= int EpetraExt_HypreIJMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { //printf("Proc[%d], Row start: %d, Row End: %d\n", Comm().MyPID(), MyRowStart_, MyRowEnd_); bool SameVectors = false; int NumVectors = X.NumVectors(); if (NumVectors != Y.NumVectors()) return -1; // X and Y must have same number of vectors if(X.Pointers() == Y.Pointers()){ SameVectors = true; } for(int VecNum = 0; VecNum < NumVectors; VecNum++) { //Get values for current vector in multivector. double * x_values; double * y_values; EPETRA_CHK_ERR((*X(VecNum)).ExtractView(&x_values)); double *x_temp = x_local->data; double *y_temp = y_local->data; if(!SameVectors){ EPETRA_CHK_ERR((*Y(VecNum)).ExtractView(&y_values)); } else { y_values = new double[X.MyLength()]; } y_local->data = y_values; EPETRA_CHK_ERR(HYPRE_ParVectorSetConstantValues(par_y,0.0)); // Temporarily make a pointer to data in Hypre for end // Replace data in Hypre vectors with epetra values x_local->data = x_values; // Do actual computation. if(TransA) { // Use transpose of A in multiply EPETRA_CHK_ERR(HYPRE_ParCSRMatrixMatvecT(1.0, ParMatrix_, par_x, 1.0, par_y)); } else { EPETRA_CHK_ERR(HYPRE_ParCSRMatrixMatvec(1.0, ParMatrix_, par_x, 1.0, par_y)); } if(SameVectors){ int NumEntries = Y.MyLength(); std::vector<double> new_values; new_values.resize(NumEntries); std::vector<int> new_indices; new_indices.resize(NumEntries); for(int i = 0; i < NumEntries; i++){ new_values[i] = y_values[i]; new_indices[i] = i; } EPETRA_CHK_ERR((*Y(VecNum)).ReplaceMyValues(NumEntries, &new_values[0], &new_indices[0])); delete[] y_values; } x_local->data = x_temp; y_local->data = y_temp; } double flops = (double) NumVectors * (double) NumGlobalNonzeros(); UpdateFlops(flops); return 0; } //Multiply()
//============================================================================== int Ifpack_Hypre::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ if(IsInitialized() == false){ IFPACK_CHK_ERR(-1); } bool SameVectors = false; int NumVectors = X.NumVectors(); if (NumVectors != Y.NumVectors()) IFPACK_CHK_ERR(-1); // X and Y must have same number of vectors if(X.Pointers() == Y.Pointers()){ SameVectors = true; } for(int VecNum = 0; VecNum < NumVectors; VecNum++) { //Get values for current vector in multivector. double * XValues; double * YValues; IFPACK_CHK_ERR((*X(VecNum)).ExtractView(&XValues)); double *XTemp = XLocal_->data; double *YTemp = YLocal_->data; if(!SameVectors){ IFPACK_CHK_ERR((*Y(VecNum)).ExtractView(&YValues)); } else { YValues = new double[X.MyLength()]; } YLocal_->data = YValues; IFPACK_CHK_ERR(HYPRE_ParVectorSetConstantValues(ParY_,0.0)); // Temporarily make a pointer to data in Hypre for end // Replace data in Hypre vectors with epetra values XLocal_->data = XValues; // Do actual computation. if(TransA) { // Use transpose of A in multiply IFPACK_CHK_ERR(HYPRE_ParCSRMatrixMatvecT(1.0, ParMatrix_, ParX_, 1.0, ParY_)); } else { IFPACK_CHK_ERR(HYPRE_ParCSRMatrixMatvec(1.0, ParMatrix_, ParX_, 1.0, ParY_)); } if(SameVectors){ int NumEntries = Y.MyLength(); std::vector<double> new_values; new_values.resize(NumEntries); std::vector<int> new_indices; new_indices.resize(NumEntries); for(int i = 0; i < NumEntries; i++){ new_values[i] = YValues[i]; new_indices[i] = i; } IFPACK_CHK_ERR((*Y(VecNum)).ReplaceMyValues(NumEntries, &new_values[0], &new_indices[0])); delete[] YValues; } XLocal_->data = XTemp; YLocal_->data = YTemp; } return 0; } //Multiply()
int Ifpack_IHSS::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ if(!IsComputed_) return -1; Time_.ResetStartTime(); bool initial_guess_is_zero=false; // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; Epetra_MultiVector Temp(X); if (X.Pointers()[0] == Y.Pointers()[0]){ Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); // Since the user didn't give us anything better, our initial guess is zero. Y.Scale(0.0); initial_guess_is_zero=true; } else Xcopy = Teuchos::rcp( &X, false ); Epetra_MultiVector T1(Y),T2(*Xcopy); // Note: Since Aherm and Askew are actually (aI+H) and (aI+S) accordingly (to save memory), // the application thereof needs to be a little different. // The published algorithm is: // temp = (aI+H)^{-1} [ (aI-S) y + x ] // y = (aI+S)^{-1} [ (aI-H) temp + x ] // // But we're doing: // temp = (aI+H)^{-1} [ 2a y - Shat y + x ] // y = (aI+S)^{-1} [ 2 a temp - Hhat temp + x ] for(int i=0;i<NumSweeps_;i++){ // temp = (aI+H)^{-1} [ 2a y - Shat y + x ] if(!initial_guess_is_zero || i >0 ){ Askew_->Apply(Y,T1); T2.Update(2*Alpha_,Y,-1,T1,1); } Pherm_->ApplyInverse(T2,Y); // y = (aI+S)^{-1} [ 2 a temp - Hhat temp + x ] Aherm_->Apply(Y,T1); T2.Scale(1.0,*Xcopy); T2.Update(2*Alpha_,Y,-1,T1,1.0); Pskew_->ApplyInverse(T2,Y); } // Counter update NumApplyInverse_++; ApplyInverseTime_ += Time_.ElapsedTime(); return 0; }
//============================================================================== // Note that Ifpack_PointRelaxation and Jacobi is much faster than // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the // way the matrix-vector produce is performed). // // Another ML-related observation is that the starting solution (in Y) // is NOT supposed to be zero. This may slow down the application of just // one sweep of Jacobi. // int Ifpack_PointRelaxation:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (X.NumVectors() != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); if (ZeroStartingSolution_) Y.PutScalar(0.0); // Flops are updated in each of the following. switch (PrecType_) { case IFPACK_JACOBI: IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y)); break; case IFPACK_GS: IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y)); break; case IFPACK_SGS: IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y)); break; default: IFPACK_CHK_ERR(-1); // something wrong } ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
//============================================================================= // This function finds Y such that LDU Y = X or U(trans) D L(trans) Y = X for multiple RHS int Ifpack_IC::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); // compute preconditioner first if (X.NumVectors() != Y.NumVectors()) IFPACK_CHK_ERR(-2); // Return error: X and Y not the same size Time_.ResetStartTime(); bool Upper = true; bool UnitDiagonal = true; // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy RefCountPtr< const Epetra_MultiVector > Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = rcp( new Epetra_MultiVector(X) ); else Xcopy = rcp( &X, false ); U_->Solve(Upper, true, UnitDiagonal, *Xcopy, Y); Y.Multiply(1.0, *D_, Y, 0.0); // y = D*y (D_ has inverse of diagonal) U_->Solve(Upper, false, UnitDiagonal, Y, Y); // Solve Uy = y #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += 4.0 * U_->NumGlobalNonzeros(); ApplyInverseFlops_ += D_->GlobalLength(); #endif ++NumApplyInverse_; ApplyInverseTime_ += Time_.ElapsedTime(); return(0); }
//============================================================================= int Ifpack_ICT::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); // compute preconditioner first if (X.NumVectors() != Y.NumVectors()) IFPACK_CHK_ERR(-2); // Return error: X and Y not the same size Time_.ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); // NOTE: H_ is based on SerialMap_, while Xcopy is based // on A.Map()... which are in general different. However, Solve() // does not seem to care... which is fine with me. // EPETRA_CHK_ERR(H_->Solve(false,false,false,*Xcopy,Y)); EPETRA_CHK_ERR(H_->Solve(false,true,false,Y,Y)); #ifdef IFPACK_FLOPCOUNTERS // these are global flop count ApplyInverseFlops_ += 4.0 * GlobalNonzeros_; #endif ++NumApplyInverse_; ApplyInverseTime_ += Time_.ElapsedTime(); return(0); }
//============================================================================== int Ifpack_Hypre::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ if(IsComputed() == false){ IFPACK_CHK_ERR(-1); } // These are hypre requirements // hypre needs A, X, and Y to have the same contiguous distribution // NOTE: Maps are only considered to be contiguous if they were generated using a // particular constructor. Otherwise, LinearMap() will not detect whether they are // actually contiguous. if(!X.Map().LinearMap() || !Y.Map().LinearMap()) { std::cerr << "ERROR: X and Y must have contiguous maps.\n"; IFPACK_CHK_ERR(-1); } if(!X.Map().PointSameAs(*MySimpleMap_) || !Y.Map().PointSameAs(*MySimpleMap_)) { std::cerr << "ERROR: X, Y, and A must have the same distribution.\n"; IFPACK_CHK_ERR(-1); } Time_.ResetStartTime(); bool SameVectors = false; int NumVectors = X.NumVectors(); if (NumVectors != Y.NumVectors()) IFPACK_CHK_ERR(-1); // X and Y must have same number of vectors if(X.Pointers() == Y.Pointers()){ SameVectors = true; } for(int VecNum = 0; VecNum < NumVectors; VecNum++) { //Get values for current vector in multivector. // FIXME amk Nov 23, 2015: This will not work for funky data layouts double * XValues; IFPACK_CHK_ERR((*X(VecNum)).ExtractView(&XValues)); double * YValues; if(!SameVectors){ IFPACK_CHK_ERR((*Y(VecNum)).ExtractView(&YValues)); } else { YValues = new double[X.MyLength()]; } // Temporarily make a pointer to data in Hypre for end double *XTemp = XLocal_->data; // Replace data in Hypre vectors with epetra values XLocal_->data = XValues; double *YTemp = YLocal_->data; YLocal_->data = YValues; IFPACK_CHK_ERR(HYPRE_ParVectorSetConstantValues(ParY_, 0.0)); if(SolveOrPrec_ == Solver){ // Use the solver methods IFPACK_CHK_ERR(SolverSolvePtr_(Solver_, ParMatrix_, ParX_, ParY_)); } else { // Apply the preconditioner IFPACK_CHK_ERR(PrecondSolvePtr_(Preconditioner_, ParMatrix_, ParX_, ParY_)); } if(SameVectors){ int NumEntries = Y.MyLength(); std::vector<double> new_values; new_values.resize(NumEntries); std::vector<int> new_indices; new_indices.resize(NumEntries); for(int i = 0; i < NumEntries; i++){ new_values[i] = YValues[i]; new_indices[i] = i; } IFPACK_CHK_ERR((*Y(VecNum)).ReplaceMyValues(NumEntries, &new_values[0], &new_indices[0])); delete[] YValues; } XLocal_->data = XTemp; YLocal_->data = YTemp; } NumApplyInverse_ = NumApplyInverse_ + 1; ApplyInverseTime_ = ApplyInverseTime_ + Time_.ElapsedTime(); return 0; } //ApplyInverse()
int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ if(!IsComputed_) return -1; Time_.ResetStartTime(); bool initial_guess_is_zero=false; const int lclNumRows = W_->NumMyRows(); const int NumVectors = X.NumVectors(); Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors); double omega=GetOmega(); // need to create an auxiliary vector, Xcopy Teuchos::RCP<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]){ Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); // Since the user didn't give us anything better, our initial guess is zero. Y.Scale(0.0); initial_guess_is_zero=true; } else Xcopy = Teuchos::rcp( &X, false ); Teuchos::RCP< Epetra_MultiVector > T2; // Note: Assuming that the matrix has an importer. Ifpack_PointRelaxation doesn't do this, but given that // I have a CrsMatrix, I'm probably OK. // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine // if things are on or off processor. // Note: T2 must be zero'd out if (IsParallel_ && W_->Importer()) T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true)); else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true)); // Pointer grabs int* rowptr,*colind; double *values; double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr; T2->ExtractView(&t2_ptr); Y.ExtractView(&y_ptr); Temp.ExtractView(&t_ptr); Xcopy->ExtractView(&x_ptr); Wdiag_->ExtractView(&d_ptr); IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values)); for(int i=0; i<NumSweeps_; i++){ // Calculate b-Ax if(!initial_guess_is_zero || i > 0) { A_->Apply(Y,Temp); Temp.Update(1.0,*Xcopy,-1.0); } else Temp.Update(1.0,*Xcopy,0.0); // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated // in this sweep before they are used, so we don't need to reset T2 to zero here. // Do backsolve & update // x = x + W^{-1} (b - A x) for(int j=0; j<lclNumRows; j++){ double diag=d_ptr[j]; for (int m=0 ; m<NumVectors; m++) { double dtmp=0.0; // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute. t2_ptr[m][j]=0.0; for(int k=rowptr[j];k<rowptr[j+1];k++){ dtmp+= values[k]*t2_ptr[m][colind[k]]; } // Yes, we need to update both of these. t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag; y_ptr[m][j] += omega*t2_ptr[m][j]; } } } // Counter update NumApplyInverse_++; ApplyInverseTime_ += Time_.ElapsedTime(); return 0; }
// ================================================ ====== ==== ==== == = //! Apply the preconditioner to an Epetra_MultiVector X, puts the result in Y int ML_Epetra::FaceMatrixFreePreconditioner::ApplyInverse(const Epetra_MultiVector& B_, Epetra_MultiVector& X) const{ const Epetra_MultiVector *B; Epetra_MultiVector *Bcopy=0; /* Sanity Checks */ int NumVectors=B_.NumVectors(); if (!B_.Map().SameAs(*FaceDomainMap_)) ML_CHK_ERR(-1); if (NumVectors != X.NumVectors()) ML_CHK_ERR(-1); Epetra_MultiVector r_edge(*FaceDomainMap_,NumVectors,false); Epetra_MultiVector e_edge(*FaceDomainMap_,NumVectors,false); Epetra_MultiVector e_node(*CoarseMap_,NumVectors,false); Epetra_MultiVector r_node(*CoarseMap_,NumVectors,false); /* Deal with the B==X case */ if (B_.Pointers()[0] == X.Pointers()[0]){ Bcopy=new Epetra_MultiVector(B_); B=Bcopy; X.PutScalar(0.0); } else B=&B_; for(int i=0;i<num_cycles;i++){ /* Pre-smoothing */ #ifdef HAVE_ML_IFPACK if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X)); #endif if(MaxLevels > 0){ if(i != 0 #ifdef HAVE_ML_IFPACK || Smoother_ #endif ){ /* Calculate Residual (r_e = b - (S+M+Addon) * x) */ ML_CHK_ERR(Operator_->Apply(X,r_edge)); ML_CHK_ERR(r_edge.Update(1.0,*B,-1.0)); /* Xfer to coarse grid (r_n = P' * r_e) */ ML_CHK_ERR(Prolongator_->Multiply(true,r_edge,r_node)); } else{ /* Xfer to coarse grid (r_n = P' * r_e) */ ML_CHK_ERR(Prolongator_->Multiply(true,*B,r_node)); } /* AMG on coarse grid (e_n = (CoarseMatrix)^{-1} r_n) */ ML_CHK_ERR(CoarsePC->ApplyInverse(r_node,e_node)); /* Xfer back to fine grid (e_e = P * e_n) */ ML_CHK_ERR(Prolongator_->Multiply(false,e_node,e_edge)); /* Add in correction (x = x + e_e) */ ML_CHK_ERR(X.Update(1.0,e_edge,1.0)); }/*end if*/ /* Post-Smoothing */ #ifdef HAVE_ML_IFPACK if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X)); #endif }/*end for*/ /* Cleanup */ if(Bcopy) delete Bcopy; return 0; }/*end ApplyInverse*/
//============================================================================= int Epetra_FastCrsMatrix::Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { // // This function find Y such that LY = X or UY = X or the transpose cases. // if (X.NumVectors()==1 && Y.NumVectors()==1) { double * xp = (double *) X[0]; double * yp = (double *) Y[0]; Epetra_Vector x(View, X.Map(), xp); Epetra_Vector y(View, Y.Map(), yp); return(Solve(Upper, Trans, UnitDiagonal, x, y)); } if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled. if ((Upper) && (!UpperTriangular())) EPETRA_CHK_ERR(-2); if ((!Upper) && (!LowerTriangular())) EPETRA_CHK_ERR(-3); if ((!UnitDiagonal) && (NoDiagonal())) EPETRA_CHK_ERR(-4); // If matrix has no diagonal, we must use UnitDiagonal if ((!UnitDiagonal) && (NumMyDiagonals()<NumMyRows_)) EPETRA_CHK_ERR(-5); // Need each row to have a diagonal int i, j, j0, k; int * NumEntriesPerRow = NumEntriesPerRow_; int ** Indices = Indices_; double ** Values = Values_; double diag; // If upper, point to last row if ((Upper && !Trans) || (!Upper && Trans)) { NumEntriesPerRow += NumMyRows_-1; Indices += NumMyRows_-1; Values += NumMyRows_-1; } double **Xp = (double**)X.Pointers(); double **Yp = (double**)Y.Pointers(); int NumVectors = X.NumVectors(); if (!Trans) { if (Upper) { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=NumMyRows_-1; i >=0; i--) { int NumEntries = *NumEntriesPerRow--; int * RowIndices = *Indices--; double * RowValues = *Values--; if (!UnitDiagonal) diag = 1.0/RowValues[0]; // Take inverse of diagonal once for later use for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=j0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]]; if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum; else Yp[k][i] = (Xp[k][i] - sum)*diag; } } } else { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++ - j0; int * RowIndices = *Indices++; double * RowValues = *Values++; if (!UnitDiagonal) diag = 1.0/RowValues[NumEntries]; // Take inverse of diagonal once for later use for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]]; if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum; else Yp[k][i] = (Xp[k][i] - sum)*diag; } } } } // *********** Transpose case ******************************* else { for (k=0; k<NumVectors; k++) if (Yp[k]!=Xp[k]) for (i=0; i < NumMyRows_; i++) Yp[k][i] = Xp[k][i]; // Initialize y for transpose multiply if (Upper) { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; if (!UnitDiagonal) diag = 1.0/RowValues[j0]; // Take inverse of diagonal once for later use for (k=0; k<NumVectors; k++) { if (!UnitDiagonal) Yp[k][i] = Yp[k][i]*diag; for (j=j0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i]; } } } else { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=NumMyRows_-1; i>=0; i--) { int NumEntries = *NumEntriesPerRow-- - j0; int * RowIndices = *Indices--; double * RowValues = *Values--; for (k=0; k<NumVectors; k++) { if (!UnitDiagonal) Yp[k][i] = Yp[k][i]/Xp[k][i]; for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i]; } } } } UpdateFlops(2*NumVectors*NumGlobalNonzeros64()); return(0); }
//============================================================================= int Epetra_FastCrsMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { // // This function forms the product Y = A * Y or Y = A' * X // if (X.NumVectors()==1 && Y.NumVectors()==1) { double * xp = (double *) X[0]; double * yp = (double *) Y[0]; Epetra_Vector x(View, X.Map(), xp); Epetra_Vector y(View, Y.Map(), yp); return(Multiply(TransA, x, y)); } if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled. int i, j, k; int * NumEntriesPerRow = NumEntriesPerRow_; int ** Indices = Indices_; double ** Values = Values_; double **Xp = (double**)X.Pointers(); double **Yp = (double**)Y.Pointers(); int NumVectors = X.NumVectors(); int NumMyCols_ = NumMyCols(); // Need to better manage the Import and Export vectors: // - Need accessor functions // - Need to make the NumVector match (use a View to do this) // - Need to look at RightScale and ColSum routines too. if (!TransA) { // If we have a non-trivial importer, we must import elements that are permuted or are on other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed ImportVector_->Import(X, *Importer(), Insert); Xp = (double**)ImportVector_->Pointers(); } // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed Yp = (double**)ExportVector_->Pointers(); } // Do actual computation for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * Xp[k][RowIndices[j]]; Yp[k][i] = sum; } } if (Exporter()!=0) Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector } else { // Transpose operation // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed ExportVector_->Import(X, *Exporter(), Insert); Xp = (double**)ExportVector_->Pointers(); } // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed Yp = (double**)ImportVector_->Pointers(); } // Do actual computation for (k=0; k<NumVectors; k++) for (i=0; i < NumMyCols_; i++) Yp[k][i] = 0.0; // Initialize y for transpose multiply for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (k=0; k<NumVectors; k++) { for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] += RowValues[j] * Xp[k][i]; } } if (Importer()!=0) Y.Export(*ImportVector_, *Importer(), Add); // Fill Y with Values from export vector } UpdateFlops(2*NumVectors*NumGlobalNonzeros64()); return(0); }
//============================================================================== int Ifpack_Chebyshev:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (PolyDegree_ == 0) return 0; int nVec = X.NumVectors(); int len = X.MyLength(); if (nVec != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); double **xPtr = 0, **yPtr = 0; Xcopy->ExtractView(&xPtr); Y.ExtractView(&yPtr); #ifdef HAVE_IFPACK_EPETRAEXT EpetraExt_PointToBlockDiagPermute* IBD=0; if (UseBlockMode_) IBD=&*InvBlockDiagonal_; #endif //--- Do a quick solve when the matrix is identity double *invDiag=0; if(!UseBlockMode_) invDiag=InvDiagonal_->Values(); if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) { #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) IBD->ApplyInverse(*Xcopy,Y); else #endif if (nVec == 1) { double *yPointer = yPtr[0], *xPointer = xPtr[0]; for (int i = 0; i < len; ++i) yPointer[i] = xPointer[i]*invDiag[i]; } else { int i, k; for (i = 0; i < len; ++i) { double coeff = invDiag[i]; for (k = 0; k < nVec; ++k) yPtr[k][i] = xPtr[k][i] * coeff; } } // if (nVec == 1) return 0; } // if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) //--- Initialize coefficients // Note that delta stores the inverse of ML_Cheby::delta double alpha = LambdaMax_ / EigRatio_; double beta = 1.1 * LambdaMax_; double delta = 2.0 / (beta - alpha); double theta = 0.5 * (beta + alpha); double s1 = theta * delta; //--- Define vectors // In ML_Cheby, V corresponds to pAux and W to dk Epetra_MultiVector V(X); Epetra_MultiVector W(X); #ifdef HAVE_IFPACK_EPETRAEXT Epetra_MultiVector Temp(X); #endif double *vPointer = V.Values(), *wPointer = W.Values(); double oneOverTheta = 1.0/theta; int i, j, k; //--- If solving normal equations, multiply RHS by A^T if(SolveNormalEquations_){ Apply_Transpose(Operator_,Y,V); Y=V; } // Do the smoothing when block scaling is turned OFF // --- Treat the initial guess if (ZeroStartingSolution_ == false) { Operator_->Apply(Y, V); // Compute W = invDiag * ( X - V )/ Theta #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { Temp.Update(oneOverTheta,X,-oneOverTheta,V,0.0); IBD->ApplyInverse(Temp,W); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(W,Temp); Apply_Transpose(Operator_,Temp,W); } } else #endif if (nVec == 1) { double *xPointer = xPtr[0]; for (i = 0; i < len; ++i) wPointer[i] = invDiag[i] * (xPointer[i] - vPointer[i]) * oneOverTheta; } else { for (i = 0; i < len; ++i) { double coeff = invDiag[i]*oneOverTheta; double *wi = wPointer + i, *vi = vPointer + i; for (k = 0; k < nVec; ++k) { *wi = (xPtr[k][i] - (*vi)) * coeff; wi = wi + len; vi = vi + len; } } } // if (nVec == 1) // Update the vector Y Y.Update(1.0, W, 1.0); } else { // Compute W = invDiag * X / Theta #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { IBD->ApplyInverse(X,W); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(W,Temp); Apply_Transpose(Operator_,Temp,W); } W.Scale(oneOverTheta); Y.Update(1.0, W, 0.0); } else #endif if (nVec == 1) { double *xPointer = xPtr[0]; for (i = 0; i < len; ++i){ wPointer[i] = invDiag[i] * xPointer[i] * oneOverTheta; } memcpy(yPtr[0], wPointer, len*sizeof(double)); } else { for (i = 0; i < len; ++i) { double coeff = invDiag[i]*oneOverTheta; double *wi = wPointer + i; for (k = 0; k < nVec; ++k) { *wi = xPtr[k][i] * coeff; wi = wi + len; } } for (k = 0; k < nVec; ++k) memcpy(yPtr[k], wPointer + k*len, len*sizeof(double)); } // if (nVec == 1) } // if (ZeroStartingSolution_ == false) //--- Apply the polynomial double rhok = 1.0/s1, rhokp1; double dtemp1, dtemp2; int degreeMinusOne = PolyDegree_ - 1; if (nVec == 1) { double *xPointer = xPtr[0]; for (k = 0; k < degreeMinusOne; ++k) { Operator_->Apply(Y, V); rhokp1 = 1.0 / (2.0*s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = 2.0 * rhokp1 * delta; rhok = rhokp1; // Compute W = dtemp1 * W W.Scale(dtemp1); // Compute W = W + dtemp2 * invDiag * ( X - V ) #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { //NTS: We can clobber V since it will be reset in the Apply V.Update(dtemp2,X,-dtemp2); IBD->ApplyInverse(V,Temp); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(V,Temp); Apply_Transpose(Operator_,Temp,V); } W.Update(1.0,Temp,1.0); } else{ #endif for (i = 0; i < len; ++i) wPointer[i] += dtemp2* invDiag[i] * (xPointer[i] - vPointer[i]); #ifdef HAVE_IFPACK_EPETRAEXT } #endif // Update the vector Y Y.Update(1.0, W, 1.0); } // for (k = 0; k < degreeMinusOne; ++k) } else { for (k = 0; k < degreeMinusOne; ++k) { Operator_->Apply(Y, V); rhokp1 = 1.0 / (2.0*s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = 2.0 * rhokp1 * delta; rhok = rhokp1; // Compute W = dtemp1 * W W.Scale(dtemp1); // Compute W = W + dtemp2 * invDiag * ( X - V ) #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { //We can clobber V since it will be reset in the Apply V.Update(dtemp2,X,-dtemp2); IBD->ApplyInverse(V,Temp); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(V,Temp); Apply_Transpose(Operator_,Temp,V); } W.Update(1.0,Temp,1.0); } else{ #endif for (i = 0; i < len; ++i) { double coeff = invDiag[i]*dtemp2; double *wi = wPointer + i, *vi = vPointer + i; for (j = 0; j < nVec; ++j) { *wi += (xPtr[j][i] - (*vi)) * coeff; wi = wi + len; vi = vi + len; } } #ifdef HAVE_IFPACK_EPETRAEXT } #endif // Update the vector Y Y.Update(1.0, W, 1.0); } // for (k = 0; k < degreeMinusOne; ++k) } // if (nVec == 1) // Flops are updated in each of the following. ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
//======================================================= int EpetraExt_HypreIJMatrix::Solve(bool Upper, bool transpose, bool UnitDiagonal, const Epetra_MultiVector & X, Epetra_MultiVector & Y) const { bool SameVectors = false; int NumVectors = X.NumVectors(); if (NumVectors != Y.NumVectors()) return -1; // X and Y must have same number of vectors if(X.Pointers() == Y.Pointers()){ SameVectors = true; } if(SolveOrPrec_ == Solver){ if(IsSolverSetup_[0] == false){ SetupSolver(); } } else { if(IsPrecondSetup_[0] == false){ SetupPrecond(); } } for(int VecNum = 0; VecNum < NumVectors; VecNum++) { //Get values for current vector in multivector. double * x_values; EPETRA_CHK_ERR((*X(VecNum)).ExtractView(&x_values)); double * y_values; if(!SameVectors){ EPETRA_CHK_ERR((*Y(VecNum)).ExtractView(&y_values)); } else { y_values = new double[X.MyLength()]; } // Temporarily make a pointer to data in Hypre for end double *x_temp = x_local->data; // Replace data in Hypre vectors with epetra values x_local->data = x_values; double *y_temp = y_local->data; y_local->data = y_values; EPETRA_CHK_ERR(HYPRE_ParVectorSetConstantValues(par_y, 0.0)); if(transpose && !TransposeSolve_){ // User requested a transpose solve, but the solver selected doesn't provide one EPETRA_CHK_ERR(-1); } if(SolveOrPrec_ == Solver){ // Use the solver methods EPETRA_CHK_ERR(SolverSolvePtr_(Solver_, ParMatrix_, par_x, par_y)); } else { // Apply the preconditioner EPETRA_CHK_ERR(PrecondSolvePtr_(Preconditioner_, ParMatrix_, par_x, par_y)); } if(SameVectors){ int NumEntries = Y.MyLength(); std::vector<double> new_values; new_values.resize(NumEntries); std::vector<int> new_indices; new_indices.resize(NumEntries); for(int i = 0; i < NumEntries; i++){ new_values[i] = y_values[i]; new_indices[i] = i; } EPETRA_CHK_ERR((*Y(VecNum)).ReplaceMyValues(NumEntries, &new_values[0], &new_indices[0])); delete[] y_values; } x_local->data = x_temp; y_local->data = y_temp; } double flops = (double) NumVectors * (double) NumGlobalNonzeros(); UpdateFlops(flops); return 0; } //Solve()
int CopyMultiVector(double** matlabApr, const Epetra_MultiVector& A) { Epetra_BlockMap bmap = A.Map(); const Epetra_Comm & comm = bmap.Comm(); int numProc = comm.NumProc(); if (numProc==1) DoCopyMultiVector(matlabApr, A); else { // In the case of more than one column in the multivector, and writing to MatrixMarket // format, we call this function recursively, passing each vector of the multivector // individually so that we can get all of it written to file before going on to the next // multivector if (A.NumVectors() > 1) { for (int i=0; i < A.NumVectors(); i++) if (CopyMultiVector(matlabApr, *(A(i)))) return(-1); return(0); } Epetra_Map map(-1, bmap.NumMyPoints(), 0, comm); // Create a veiw of this multivector using a map (instead of block map) Epetra_MultiVector A1(View, map, A.Pointers(), A.NumVectors()); int numRows = map.NumMyElements(); Epetra_Map allGidsMap(-1, numRows, 0,comm); Epetra_IntVector allGids(allGidsMap); for (int i=0; i<numRows; i++) allGids[i] = map.GID(i); // Now construct a MultiVector on PE 0 by strip-mining the rows of the input matrix A. int numChunks = numProc; int stripSize = allGids.GlobalLength()/numChunks; int remainder = allGids.GlobalLength()%numChunks; int curStart = 0; int curStripSize = 0; Epetra_IntSerialDenseVector importGidList; int numImportGids = 0; if (comm.MyPID()==0) importGidList.Size(stripSize+1); // Set size of vector to max needed for (int i=0; i<numChunks; i++) { if (comm.MyPID()==0) { // Only PE 0 does this part curStripSize = stripSize; if (i<remainder) curStripSize++; // handle leftovers for (int j=0; j<curStripSize; j++) importGidList[j] = j + curStart; curStart += curStripSize; } // The following import map will be non-trivial only on PE 0. Epetra_Map importGidMap(-1, curStripSize, importGidList.Values(), 0, comm); Epetra_Import gidImporter(importGidMap, allGidsMap); Epetra_IntVector importGids(importGidMap); if (importGids.Import(allGids, gidImporter, Insert)) return(-1); // importGids now has a list of GIDs for the current strip of matrix rows. // Use these values to build another importer that will get rows of the matrix. // The following import map will be non-trivial only on PE 0. Epetra_Map importMap(-1, importGids.MyLength(), importGids.Values(), 0, comm); Epetra_Import importer(importMap, map); Epetra_MultiVector importA(importMap, A1.NumVectors()); if (importA.Import(A1, importer, Insert)) return(-1); // Finally we are ready to write this strip of the matrix to ostream if (DoCopyMultiVector(matlabApr, importA)) return(-1); } } return(0); }