//============================================================================= int Epetra_MsrMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { (void)TransA; int NumVectors = X.NumVectors(); if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1); // X and Y must have same number of vectors double ** xptrs; double ** yptrs; X.ExtractView(&xptrs); Y.ExtractView(&yptrs); if (RowMatrixImporter()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors); // Create import vector if needed ImportVector_->Import(X, *RowMatrixImporter(), Insert); ImportVector_->ExtractView(&xptrs); } for (int i=0; i<NumVectors; i++) Amat_->matvec(xptrs[i], yptrs[i], Amat_, proc_config_); double flops = NumGlobalNonzeros(); flops *= 2.0; flops *= (double) NumVectors; UpdateFlops(flops); return(0); }
int Epetra_PETScAIJMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { (void)TransA; int NumVectors = X.NumVectors(); if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1); // X and Y must have same number of vectors double ** xptrs; double ** yptrs; X.ExtractView(&xptrs); Y.ExtractView(&yptrs); if (RowMatrixImporter()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors); ImportVector_->Import(X, *RowMatrixImporter(), Insert); ImportVector_->ExtractView(&xptrs); } double *vals=0; int length; Vec petscX, petscY; int ierr; for (int i=0; i<NumVectors; i++) { # ifdef HAVE_MPI ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # else //FIXME untested ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr); ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr); # endif ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr); ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr); ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr); for (int j=0; j<length; j++) yptrs[i][j] = vals[j]; ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr); } VecDestroy(petscX); VecDestroy(petscY); double flops = NumGlobalNonzeros(); flops *= 2.0; flops *= (double) NumVectors; UpdateFlops(flops); return(0); } //Multiply()
Epetra_OskiMultiVector::Epetra_OskiMultiVector(const Epetra_MultiVector& Source) : Epetra_MultiVector(Source), Epetra_View_(&Source), Copy_Created_(false) { double* A; double** Aptr; int LDA; int* LDAptr; LDAptr = new int[1]; Aptr = new double*[1]; if(Source.ConstantStride() || (Source.NumVectors() == 1)) { if(Source.ExtractView(Aptr, LDAptr)) std::cerr << "Extract view failed\n"; else Oski_View_ = oski_CreateMultiVecView(*Aptr, Source.MyLength(), Source.NumVectors(), LAYOUT_COLMAJ, *LDAptr); } else { Copy_Created_ = true; LDA = Source.MyLength(); A = new double[LDA*Source.NumVectors()]; if(Source.ExtractCopy(A, LDA)) std::cerr << "Extract copy failed\n"; else Oski_View_ = oski_CreateMultiVecView(A, Source.MyLength(), Source.NumVectors(), LAYOUT_COLMAJ, LDA); } delete [] LDAptr; delete [] Aptr; }
//EpetraMultiVector_To_TpetraMultiVector: copies Epetra_MultiVector to non-const Tpetra_MultiVector Teuchos::RCP<Tpetra_MultiVector> Petra::EpetraMultiVector_To_TpetraMultiVector(const Epetra_MultiVector& epetraMV_, const Teuchos::RCP<const Teuchos::Comm<int> >& commT_) { //get map from epetraMV_ and convert to Tpetra::Map auto mapT = EpetraMap_To_TpetraMap(epetraMV_.Map(), commT_); //copy values from epetraMV_ int numVectors = epetraMV_.NumVectors(); int Length; ST *values; epetraMV_.ExtractView(&values, &Length); Teuchos::ArrayView<ST> valuesAV = Teuchos::arrayView(values, Length*numVectors); //create Tpetra_MultiVector copy of epetraMV_ Teuchos::RCP<Tpetra_MultiVector> tpetraMV_ = Teuchos::rcp(new Tpetra_MultiVector(mapT, valuesAV, Length, numVectors)); return tpetraMV_; }
// Convert a Epetra_MultiVector with assumed block structure dictated by the // vector space into a Thyra::MultiVectorBase object. // const Teuchos::RCP<const Thyra::MultiVectorBase<double> > blockEpetraToThyra(const Epetra_MultiVector & e,const Teuchos::RCP<const Thyra::VectorSpaceBase<double> > & vs) void blockEpetraToThyra(const Epetra_MultiVector & epetraX,const Teuchos::Ptr<Thyra::MultiVectorBase<double> > & thyraX) { TEUCHOS_ASSERT(thyraX->range()->dim()==epetraX.GlobalLength()); // extract local information from the Epetra_MultiVector int leadingDim=0,numVectors=0,localDim=0; double * epetraData=0; epetraX.ExtractView(&epetraData,&leadingDim); numVectors = epetraX.NumVectors(); blockEpetraToThyra(numVectors,epetraData,leadingDim,thyraX.ptr(),localDim); TEUCHOS_ASSERT(localDim==epetraX.MyLength()); }
// Convert a Thyra::MultiVectorBase object to a Epetra_MultiVector object with // the map defined by the Epetra_Map. // const Teuchos::RCP<const Epetra_MultiVector> // blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & tX,const RCP<const Epetra_Map> & map) void blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & thyraX,Epetra_MultiVector & epetraX) { // build an Epetra_MultiVector object int numVectors = thyraX->domain()->dim(); // make sure the number of vectors are the same TEUCHOS_ASSERT(numVectors==epetraX.NumVectors()); TEUCHOS_ASSERT(thyraX->range()->dim()==epetraX.GlobalLength()); // extract local information from the Epetra_MultiVector int leadingDim=0,localDim=0; double * epetraData=0; epetraX.ExtractView(&epetraData,&leadingDim); // perform recursive copy blockThyraToEpetra(numVectors,epetraData,leadingDim,thyraX,localDim); // sanity check TEUCHOS_ASSERT(localDim==epetraX.Map().NumMyElements()); }
//============================================================================== int Ifpack_PointRelaxation:: ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int NumVectors = X.NumVectors(); int Length = Matrix().MaxNumEntries(); std::vector<int> Indices(Length); std::vector<double> Values(Length); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); else Y2 = Teuchos::rcp( &Y, false ); // extract views (for nicer and faster code) double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int j = 0; j < NumSweeps_ ; j++) { // data exchange is here, once per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); // FIXME: do I really need this code below? if (NumVectors == 1) { double* y0_ptr = y_ptr[0]; double* y20_ptr = y2_ptr[0]; double* x0_ptr = x_ptr[0]; if(!DoBackwardGS_){ /* Forward Mode */ for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y20_ptr[col]; } y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp); } } else { /* Backward Mode */ for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; (void) col; // Forestall compiler warning for unused variable. IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y20_ptr[i]; } y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp); } } // using Export() sounded quite expensive if (IsParallel_) for (int i = 0 ; i < NumMyRows_ ; ++i) y0_ptr[i] = y20_ptr[i]; } else { if(!DoBackwardGS_){ /* Forward Mode */ for (int i = 0 ; i < NumMyRows_ ; ++i) { int NumEntries; int col; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp); } } } else { /* Backward Mode */ for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { int NumEntries; int col; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp); } } } // using Export() sounded quite expensive if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; y_ptr[m][i] = y2_ptr[m][i]; } } } #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_); #endif return(0); } //ApplyInverseGS_RowMatrix()
int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ if(!IsComputed_) return -1; Time_.ResetStartTime(); bool initial_guess_is_zero=false; const int lclNumRows = W_->NumMyRows(); const int NumVectors = X.NumVectors(); Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors); double omega=GetOmega(); // need to create an auxiliary vector, Xcopy Teuchos::RCP<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]){ Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); // Since the user didn't give us anything better, our initial guess is zero. Y.Scale(0.0); initial_guess_is_zero=true; } else Xcopy = Teuchos::rcp( &X, false ); Teuchos::RCP< Epetra_MultiVector > T2; // Note: Assuming that the matrix has an importer. Ifpack_PointRelaxation doesn't do this, but given that // I have a CrsMatrix, I'm probably OK. // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine // if things are on or off processor. // Note: T2 must be zero'd out if (IsParallel_ && W_->Importer()) T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true)); else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true)); // Pointer grabs int* rowptr,*colind; double *values; double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr; T2->ExtractView(&t2_ptr); Y.ExtractView(&y_ptr); Temp.ExtractView(&t_ptr); Xcopy->ExtractView(&x_ptr); Wdiag_->ExtractView(&d_ptr); IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values)); for(int i=0; i<NumSweeps_; i++){ // Calculate b-Ax if(!initial_guess_is_zero || i > 0) { A_->Apply(Y,Temp); Temp.Update(1.0,*Xcopy,-1.0); } else Temp.Update(1.0,*Xcopy,0.0); // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated // in this sweep before they are used, so we don't need to reset T2 to zero here. // Do backsolve & update // x = x + W^{-1} (b - A x) for(int j=0; j<lclNumRows; j++){ double diag=d_ptr[j]; for (int m=0 ; m<NumVectors; m++) { double dtmp=0.0; // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute. t2_ptr[m][j]=0.0; for(int k=rowptr[j];k<rowptr[j+1];k++){ dtmp+= values[k]*t2_ptr[m][colind[k]]; } // Yes, we need to update both of these. t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag; y_ptr[m][j] += omega*t2_ptr[m][j]; } } } // Counter update NumApplyInverse_++; ApplyInverseTime_ += Time_.ElapsedTime(); return 0; }
//============================================================================== int Ifpack_Chebyshev:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (PolyDegree_ == 0) return 0; int nVec = X.NumVectors(); int len = X.MyLength(); if (nVec != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); double **xPtr = 0, **yPtr = 0; Xcopy->ExtractView(&xPtr); Y.ExtractView(&yPtr); #ifdef HAVE_IFPACK_EPETRAEXT EpetraExt_PointToBlockDiagPermute* IBD=0; if (UseBlockMode_) IBD=&*InvBlockDiagonal_; #endif //--- Do a quick solve when the matrix is identity double *invDiag=0; if(!UseBlockMode_) invDiag=InvDiagonal_->Values(); if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) { #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) IBD->ApplyInverse(*Xcopy,Y); else #endif if (nVec == 1) { double *yPointer = yPtr[0], *xPointer = xPtr[0]; for (int i = 0; i < len; ++i) yPointer[i] = xPointer[i]*invDiag[i]; } else { int i, k; for (i = 0; i < len; ++i) { double coeff = invDiag[i]; for (k = 0; k < nVec; ++k) yPtr[k][i] = xPtr[k][i] * coeff; } } // if (nVec == 1) return 0; } // if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) //--- Initialize coefficients // Note that delta stores the inverse of ML_Cheby::delta double alpha = LambdaMax_ / EigRatio_; double beta = 1.1 * LambdaMax_; double delta = 2.0 / (beta - alpha); double theta = 0.5 * (beta + alpha); double s1 = theta * delta; //--- Define vectors // In ML_Cheby, V corresponds to pAux and W to dk Epetra_MultiVector V(X); Epetra_MultiVector W(X); #ifdef HAVE_IFPACK_EPETRAEXT Epetra_MultiVector Temp(X); #endif double *vPointer = V.Values(), *wPointer = W.Values(); double oneOverTheta = 1.0/theta; int i, j, k; //--- If solving normal equations, multiply RHS by A^T if(SolveNormalEquations_){ Apply_Transpose(Operator_,Y,V); Y=V; } // Do the smoothing when block scaling is turned OFF // --- Treat the initial guess if (ZeroStartingSolution_ == false) { Operator_->Apply(Y, V); // Compute W = invDiag * ( X - V )/ Theta #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { Temp.Update(oneOverTheta,X,-oneOverTheta,V,0.0); IBD->ApplyInverse(Temp,W); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(W,Temp); Apply_Transpose(Operator_,Temp,W); } } else #endif if (nVec == 1) { double *xPointer = xPtr[0]; for (i = 0; i < len; ++i) wPointer[i] = invDiag[i] * (xPointer[i] - vPointer[i]) * oneOverTheta; } else { for (i = 0; i < len; ++i) { double coeff = invDiag[i]*oneOverTheta; double *wi = wPointer + i, *vi = vPointer + i; for (k = 0; k < nVec; ++k) { *wi = (xPtr[k][i] - (*vi)) * coeff; wi = wi + len; vi = vi + len; } } } // if (nVec == 1) // Update the vector Y Y.Update(1.0, W, 1.0); } else { // Compute W = invDiag * X / Theta #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { IBD->ApplyInverse(X,W); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(W,Temp); Apply_Transpose(Operator_,Temp,W); } W.Scale(oneOverTheta); Y.Update(1.0, W, 0.0); } else #endif if (nVec == 1) { double *xPointer = xPtr[0]; for (i = 0; i < len; ++i){ wPointer[i] = invDiag[i] * xPointer[i] * oneOverTheta; } memcpy(yPtr[0], wPointer, len*sizeof(double)); } else { for (i = 0; i < len; ++i) { double coeff = invDiag[i]*oneOverTheta; double *wi = wPointer + i; for (k = 0; k < nVec; ++k) { *wi = xPtr[k][i] * coeff; wi = wi + len; } } for (k = 0; k < nVec; ++k) memcpy(yPtr[k], wPointer + k*len, len*sizeof(double)); } // if (nVec == 1) } // if (ZeroStartingSolution_ == false) //--- Apply the polynomial double rhok = 1.0/s1, rhokp1; double dtemp1, dtemp2; int degreeMinusOne = PolyDegree_ - 1; if (nVec == 1) { double *xPointer = xPtr[0]; for (k = 0; k < degreeMinusOne; ++k) { Operator_->Apply(Y, V); rhokp1 = 1.0 / (2.0*s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = 2.0 * rhokp1 * delta; rhok = rhokp1; // Compute W = dtemp1 * W W.Scale(dtemp1); // Compute W = W + dtemp2 * invDiag * ( X - V ) #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { //NTS: We can clobber V since it will be reset in the Apply V.Update(dtemp2,X,-dtemp2); IBD->ApplyInverse(V,Temp); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(V,Temp); Apply_Transpose(Operator_,Temp,V); } W.Update(1.0,Temp,1.0); } else{ #endif for (i = 0; i < len; ++i) wPointer[i] += dtemp2* invDiag[i] * (xPointer[i] - vPointer[i]); #ifdef HAVE_IFPACK_EPETRAEXT } #endif // Update the vector Y Y.Update(1.0, W, 1.0); } // for (k = 0; k < degreeMinusOne; ++k) } else { for (k = 0; k < degreeMinusOne; ++k) { Operator_->Apply(Y, V); rhokp1 = 1.0 / (2.0*s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = 2.0 * rhokp1 * delta; rhok = rhokp1; // Compute W = dtemp1 * W W.Scale(dtemp1); // Compute W = W + dtemp2 * invDiag * ( X - V ) #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { //We can clobber V since it will be reset in the Apply V.Update(dtemp2,X,-dtemp2); IBD->ApplyInverse(V,Temp); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(V,Temp); Apply_Transpose(Operator_,Temp,V); } W.Update(1.0,Temp,1.0); } else{ #endif for (i = 0; i < len; ++i) { double coeff = invDiag[i]*dtemp2; double *wi = wPointer + i, *vi = vPointer + i; for (j = 0; j < nVec; ++j) { *wi += (xPtr[j][i] - (*vi)) * coeff; wi = wi + len; vi = vi + len; } } #ifdef HAVE_IFPACK_EPETRAEXT } #endif // Update the vector Y Y.Update(1.0, W, 1.0); } // for (k = 0; k < degreeMinusOne; ++k) } // if (nVec == 1) // Flops are updated in each of the following. ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
int Amesos_Scalapack::Solve() { if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl; NumSolve_++; Epetra_MultiVector *vecX = Problem_->GetLHS() ; Epetra_MultiVector *vecB = Problem_->GetRHS() ; // // Compute the number of right hands sides // (and check that X and B have the same shape) // int nrhs; if ( vecX == 0 ) { nrhs = 0 ; EPETRA_CHK_ERR( vecB != 0 ) ; } else { nrhs = vecX->NumVectors() ; EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; } Epetra_MultiVector *ScalapackB =0; Epetra_MultiVector *ScalapackX =0; // // Extract Scalapack versions of X and B // double *ScalapackXvalues ; Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); Time_->ResetStartTime(); // track time to broadcast vectors // // Copy B to the scalapack version of B // const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap(); Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap ); ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ; ScalapackB = ScalapackBextract ; ScalapackX = ScalapackXextract ; VecTime_ += Time_->ElapsedTime(); // // Call SCALAPACKs PDGETRS to perform the solve // int DescX[10]; ScalapackX->Scale(1.0, *ScalapackB) ; int ScalapackXlda ; Time_->ResetStartTime(); // tract time to solve // // Setup DescX // if( nrhs > nb_ ) { EPETRA_CHK_ERR( -2 ); } int Ierr[1] ; Ierr[0] = 0 ; const int zero = 0 ; const int one = 1 ; if ( iam_ < nprow_ * npcol_ ) { assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = " << ScalapackXlda << " lda_ = " << lda_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " iam_ = " << iam_ << std::endl ; if ( TwoD_distribution_ ) assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; DESCINIT_F77(DescX, &NumGlobalElements_, &nrhs, &nb_, &nb_, &zero, &zero, &ictxt_, &lda_, Ierr ) ; assert( Ierr[0] == 0 ) ; // // For the 1D data distribution, we factor the transposed // matrix, hence we must invert the sense of the transposition // char trans = 'N'; if ( TwoD_distribution_ ) { if ( UseTranspose() ) trans = 'T' ; } else { if ( ! UseTranspose() ) trans = 'T' ; } if ( nprow_ * npcol_ == 1 ) { DGETRS_F77(&trans, &NumGlobalElements_, &nrhs, &DenseA_[0], &lda_, &Ipiv_[0], ScalapackXvalues, &lda_, Ierr ) ; } else { PDGETRS_F77(&trans, &NumGlobalElements_, &nrhs, &DenseA_[0], &one, &one, DescA_, &Ipiv_[0], ScalapackXvalues, &one, &one, DescX, Ierr ) ; } } SolTime_ += Time_->ElapsedTime(); Time_->ResetStartTime(); // track time to broadcast vectors // // Copy X back to the original vector // Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ ); vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ; delete ScalapackBextract ; delete ScalapackXextract ; VecTime_ += Time_->ElapsedTime(); // All processes should return the same error code if ( nprow_ * npcol_ < Comm().NumProc() ) Comm().Broadcast( Ierr, 1, 0 ) ; // MS // compute vector norms if( ComputeVectorNorms_ == true || verbose_ == 2 ) { double NormLHS, NormRHS; for( int i=0 ; i<nrhs ; ++i ) { assert((*vecX)(i)->Norm2(&NormLHS)==0); assert((*vecB)(i)->Norm2(&NormRHS)==0); if( verbose_ && Comm().MyPID() == 0 ) { std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS << ", ||b|| = " << NormRHS << std::endl; } } } // MS // compute true residual if( ComputeTrueResidual_ == true || verbose_ == 2 ) { double Norm; Epetra_MultiVector Ax(vecB->Map(),nrhs); for( int i=0 ; i<nrhs ; ++i ) { (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax)); (Ax.Update(1.0, *((*vecB)(i)), -1.0)); (Ax.Norm2(&Norm)); if( verbose_ && Comm().MyPID() == 0 ) { std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl; } } } return Ierr[0]; }
int ShyLU_Probing_Operator::Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const { #ifdef TIMING_OUTPUT apply_time_->start(); #endif int nvectors = X.NumVectors(); bool local = (C_->Comm().NumProc() == 1); int err; //cout << "No of colors after probing" << nvectors << endl; #ifdef TIMING_OUTPUT matvec_time_->start(); #endif err = G_->Multiply(false, X, *temp2); assert(err == 0); if (!local) err = C_->Multiply(false, X, *temp); else { // localize X double *values; int mylda; X.ExtractView(&values, &mylda); Epetra_SerialComm LComm; // Use Serial Comm for the local blocks. Epetra_Map SerialMap(X.Map().NumMyElements(), X.Map().NumMyElements(), X.Map().MyGlobalElements(), 0, LComm); Epetra_MultiVector Xl(View, SerialMap, values, mylda, X.NumVectors()); err = C_->Multiply(false, Xl, *temp); } assert(err == 0); #ifdef TIMING_OUTPUT matvec_time_->stop(); #endif int nrows = C_->RowMap().NumMyElements(); #ifdef DEBUG cout << "DEBUG MODE" << endl; assert(nrows == localDRowMap_->NumGlobalElements()); int gids[nrows], gids1[nrows]; C_->RowMap().MyGlobalElements(gids); localDRowMap_->MyGlobalElements(gids1); for (int i = 0; i < nrows; i++) { assert(gids[i] == gids1[i]); } #endif #ifdef TIMING_OUTPUT localize_time_->start(); #endif //int err; int lda; double *values; if (!local) { err = temp->ExtractView(&values, &lda); assert (err == 0); // copy to local vector //TODO: OMP parallel assert(lda == nrows); //#pragma omp parallel for shared(nvectors, nrows, values) for (int v = 0; v < nvectors; v++) { for (int i = 0; i < nrows; i++) { err = ltemp->ReplaceMyValue(i, v, values[i+v*lda]); assert (err == 0); } } } #ifdef TIMING_OUTPUT localize_time_->stop(); trisolve_time_->start(); #endif if (!local) { LP_->SetRHS(ltemp.getRawPtr()); } else { //LP_->SetRHS(temp.getRawPtr()); } //LP_->SetLHS(localX.getRawPtr()); //TODO: Why not just in Reset(). Check the distr path. ssym_->OrigLP->SetLHS(localX.getRawPtr()); ssym_->OrigLP->SetRHS(temp.getRawPtr()); ssym_->ReIdx_LP->fwd(); solver_->Solve(); #ifdef TIMING_OUTPUT trisolve_time_->stop(); dist_time_->start(); #endif if (!local) { err = localX->ExtractView(&values, &lda); assert (err == 0); //Copy back to dist vector //TODO: OMP parallel //#pragma omp parallel for for (int v = 0; v < nvectors; v++) { for (int i = 0; i < nrows; i++) { err = temp->ReplaceMyValue(i, v, values[i+v*lda]); assert (err == 0); } } } #ifdef TIMING_OUTPUT dist_time_->stop(); matvec2_time_->start(); #endif if (!local) { R_->Multiply(false, *temp, Y); } else { // Should Y be localY in Multiply and then exported to Y ?? TODO: // Use view mode ? double *values; int mylda; Y.ExtractView(&values, &mylda); Epetra_SerialComm LComm; // Use Serial Comm for the local blocks. Epetra_Map SerialMap(Y.Map().NumMyElements(), Y.Map().NumMyElements(), Y.Map().MyGlobalElements(), 0, LComm); Epetra_MultiVector Yl(View, SerialMap, values, mylda, Y.NumVectors()); R_->Multiply(false, *localX, Yl); } #ifdef TIMING_OUTPUT matvec2_time_->stop(); update_time_->start(); #endif err = Y.Update(1.0, *temp2, -1.0); //cout << Y.MyLength() << " " << temp2.MyLength() << endl; assert(err == 0); #ifdef TIMING_OUTPUT update_time_->stop(); apply_time_->stop(); #endif cntApply++; return 0; }
int BuildMultiVectorTests (Epetra_MultiVector & C, const double alpha, Epetra_MultiVector& A, Epetra_MultiVector& sqrtA, Epetra_MultiVector& B, Epetra_MultiVector& C_alphaA, Epetra_MultiVector& C_alphaAplusB, Epetra_MultiVector& C_plusB, double* const dotvec_AB, double* const norm1_A, double* const norm2_sqrtA, double* const norminf_A, double* const normw_A, Epetra_MultiVector& Weights, double* const minval_A, double* const maxval_A, double* const meanval_A ) { // For given values alpha and a (possibly zero) filled // Epetra_MultiVector (the this object), allocated double * arguments dotvec_AB, // norm1_A, and norm2_A, and allocated Epetra_MultiVectors A, sqrtA, // B, C_alpha, C_alphaAplusB and C_plusB, this method will generate values for // Epetra_MultiVectors A, B and all of the additional arguments on // the list above such that, if A, B and (this) are used with the methods in // this class, the results should match the results generated by this routine. // Specifically, the results in dotvec_AB should match those from a call to // A.dotProd (B,dotvec). Similarly for other routines. int i,j; double fi, fj; // Used for casting loop variables to floats // Define some useful constants int A_nrows = A.MyLength(); int A_ncols = A.NumVectors(); int sqrtA_nrows = sqrtA.MyLength(); int sqrtA_ncols = sqrtA.NumVectors(); int B_nrows = B.MyLength(); int B_ncols = B.NumVectors(); double **Ap = 0; double **sqrtAp = 0; double **Bp = 0; double **Cp = 0; double **C_alphaAp = 0; double **C_alphaAplusBp = 0; double **C_plusBp = 0; double **Weightsp = 0; A.ExtractView(&Ap); sqrtA.ExtractView(&sqrtAp); B.ExtractView(&Bp); C.ExtractView(&Cp); C_alphaA.ExtractView(&C_alphaAp); C_alphaAplusB.ExtractView(&C_alphaAplusBp); C_plusB.ExtractView(&C_plusBp); Weights.ExtractView(&Weightsp); bool A_is_local = (A.MyLength() == A.GlobalLength()); bool B_is_local = (B.MyLength() == B.GlobalLength()); bool C_is_local = (C.MyLength() == C.GlobalLength()); int A_IndexBase = A.Map().IndexBase(); int B_IndexBase = B.Map().IndexBase(); // Build two new maps that we can use for defining global equation indices below Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm()); Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm()); int* A_MyGlobalElements = new int[A_nrows]; A_Map->MyGlobalElements(A_MyGlobalElements); int* B_MyGlobalElements = new int[B_nrows]; B_Map->MyGlobalElements(B_MyGlobalElements); // Check for compatible dimensions if (C.MyLength() != A_nrows || A_nrows != B_nrows || C.NumVectors() != A_ncols || A_ncols != B_ncols || sqrtA_nrows != A_nrows || sqrtA_ncols != A_ncols || C.MyLength() != C_alphaA.MyLength() || C.NumVectors() != C_alphaA.NumVectors() || C.MyLength() != C_alphaAplusB.MyLength() || C.NumVectors() != C_alphaAplusB.NumVectors() || C.MyLength() != C_plusB.MyLength() || C.NumVectors() != C_plusB.NumVectors() ) return(-2); // Return error bool Case1 = ( A_is_local && B_is_local && C_is_local); // Case 1 bool Case2 = (!A_is_local && !B_is_local && !C_is_local);// Case 2 // Test for meaningful cases if (!(Case1 || Case2)) return(-3); // Meaningless case /* Fill A and B with values as follows: If A_is_local is false: A(i,j) = A_MyGlobalElements[i]*j, i=1,...,numLocalEquations, j=1,...,NumVectors else A(i,j) = i*j, i=1,...,numLocalEquations, j=1,...,NumVectors If B_is_local is false: B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors else B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors In addition, scale each entry by GlobalLength for A and 1/GlobalLength for B--keeps the magnitude of entries in check */ //Define scale factor double sf = A.GlobalLength(); double sfinv = 1.0/sf; // Define A if (A_is_local) { for (j = 0; j <A_ncols ; j++) { for (i = 0; i<A_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[j][i] = (fi*sfinv)*fj; sqrtAp[j][i] = std::sqrt(Ap[j][i]); } } } else { for (j = 0; j <A_ncols ; j++) { for (i = 0; i<A_nrows; i++) { fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[j][i] = (fi*sfinv)*fj; sqrtAp[j][i] = std::sqrt(Ap[j][i]); } } } // Define B depending on TransB and B_is_local if (B_is_local) { for (j = 0; j <B_ncols ; j++) { for (i = 0; i<B_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[j][i] = 1.0/((fi*sfinv)*fj); } } } else { for (j = 0; j <B_ncols ; j++) { for (i = 0; i<B_nrows; i++) { fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[j][i] = 1.0/((fi*sfinv)*fj); } } } // Generate C_alphaA = alpha * A for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) C_alphaAp[j][i] = alpha * Ap[j][i]; // Generate C_alphaA = alpha * A + B for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) C_alphaAplusBp[j][i] = alpha * Ap[j][i] + Bp[j][i]; // Generate C_plusB = this + B for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) C_plusBp[j][i] = Cp[j][i] + Bp[j][i]; // Generate dotvec_AB. Because B(i,j) = 1/A(i,j), dotvec[i] = C.GlobalLength() for (i=0; i< A.NumVectors(); i++) dotvec_AB[i] = C.GlobalLength(); // For the next two results we want to be careful how we do arithmetic // to avoid very large numbers. // We are computing sfinv*(C.GlobalLength()*(C.GlobalLength()+1)/2) double result = C.GlobalLength(); result *= sfinv; result /= 2.0; result *= (double)(C.GlobalLength()+1); // Generate norm1_A. Can use formula for sum of first n integers. for (i=0; i< A.NumVectors(); i++) // m1_A[i] = (i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2; norm1_A[i] = result * ((double) (i+1)); // Generate norm2_sqrtA. Can use formula for sum of first n integers. for (i=0; i< A.NumVectors(); i++) // norm2_sqrtA[i] = std::sqrt((double) ((i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2)); norm2_sqrtA[i] = std::sqrt(result * ((double) (i+1))); // Generate norminf_A, minval_A, maxval_A, meanval_A. for (i=0; i< A.NumVectors(); i++) { norminf_A[i] = (double) (i+1); minval_A[i] = (double) (i+1)/ (double) A.GlobalLength(); maxval_A[i] = (double) (i+1); meanval_A[i] = norm1_A[i]/((double) (A.GlobalLength())); } // Define weights and expected weighted norm for (i=0; i< A.NumVectors(); i++) { double ip1 = (double) i+1; normw_A[i] = ip1; for (j=0; j<A_nrows; j++) Weightsp[i][j] = Ap[i][j]/ip1; } delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(0); }
int LOCA::Epetra::AugmentedOp::Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { // Get number of vectors int n = Input.NumVectors(); // Check num vectors is correct if (importedInput == NULL || n != importedInput->NumVectors()) globalData->locaErrorCheck->throwError("LOCA::Epetra::AugmentedOp::Apply()" "Must call init() before Apply()!"); // Import parameter components importedInput->Import(Input, *extendedImporter, Insert); // get views double **input_view; double **result_view; importedInput->ExtractView(&input_view); Result.ExtractView(&result_view); // get views of paramter components // this is done by setting the pointer for each column to start at // underlyingLength double **input_view_y = new double*[n]; for (int i=0; i<n; i++) input_view_y[i] = input_view[i]+underlyingLength; // break input, result into components Epetra_MultiVector input_x(View, underlyingMap, input_view, n); Epetra_MultiVector input_y(View, localMap, input_view_y, n); Epetra_MultiVector result_x(View, underlyingMap, result_view, n); // Compute J*input_x jacOperator->Apply(input_x, result_x); if (useTranspose) { // Compute J^T*input_x + b*input_y result_x.Multiply('N', 'N', 1.0, *b, input_y, 1.0); // Compute a^T*input_x + c^T*input_y result_y->Multiply('T', 'N', 1.0, *a, input_x, 0.0); result_y->Multiply('T', 'N', 1.0, c, input_y, 1.0); } else { // Compute J*input_x + a*input_y result_x.Multiply('N', 'N', 1.0, *a, input_y, 1.0); // Compute b^T*input_x + c*input_y result_y->Multiply('T', 'N', 1.0, *b, input_x, 0.0); result_y->Multiply('N', 'N', 1.0, c, input_y, 1.0); } // Set parameter component if (haveParamComponent) for (int j=0; j<n; j++) for (int i=0; i<numConstraints; i++) result_view[j][underlyingLength+i] = (*result_y)[j][i]; delete [] input_view_y; return 0; }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif #define ML_SCALING #ifdef ML_SCALING const int ntimers=4; enum {total, probBuild, precBuild, solve}; ml_DblLoc timeVec[ntimers], maxTime[ntimers], minTime[ntimers]; for (int i=0; i<ntimers; i++) timeVec[i].rank = Comm.MyPID(); timeVec[total].value = MPI_Wtime(); #endif int nx; if (argc > 1) nx = (int) strtol(argv[1],NULL,10); else nx = 256; if (nx < 1) nx = 256; // input a nonpositive integer if you want to specify // the XML input file name. nx = nx*(int)sqrt((double)Comm.NumProc()); int ny = nx; printf("nx = %d\nny = %d\n",nx,ny); fflush(stdout); char xmlFile[80]; bool readXML=false; if (argc > 2) {strcpy(xmlFile,argv[2]); readXML = true;} else sprintf(xmlFile,"%s","params.xml"); ParameterList GaleriList; GaleriList.set("nx", nx); GaleriList.set("ny", ny); #ifdef ML_SCALING timeVec[probBuild].value = MPI_Wtime(); #endif Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList); Epetra_CrsMatrix* A = CreateCrsMatrix("Laplace2D", Map, GaleriList); if (!Comm.MyPID()) printf("nx = %d, ny = %d, mx = %d, my = %d\n",nx,ny,GaleriList.get("mx",-1),GaleriList.get("my",-1)); fflush(stdout); //avoid potential overflow double numMyRows = A->NumMyRows(); double numGlobalRows; Comm.SumAll(&numMyRows,&numGlobalRows,1); if (!Comm.MyPID()) printf("# global rows = %1.0f\n",numGlobalRows); //printf("pid %d: #rows = %d\n",Comm.MyPID(),A->NumMyRows()); fflush(stdout); Epetra_MultiVector *coords = CreateCartesianCoordinates("2D", Map,GaleriList); double *x_coord=0,*y_coord=0,*z_coord=0; double **ttt; if (!coords->ExtractView(&ttt)) { x_coord = ttt[0]; y_coord = ttt[1]; } else { if (!Comm.MyPID()) printf("Error extracting coordinate vectors\n"); MPI_Finalize(); exit(EXIT_FAILURE); } Epetra_Vector LHS(*Map); LHS.Random(); Epetra_Vector RHS(*Map); RHS.PutScalar(0.0); Epetra_LinearProblem Problem(A, &LHS, &RHS); AztecOO solver(Problem); #ifdef ML_SCALING timeVec[probBuild].value = MPI_Wtime() - timeVec[probBuild].value; #endif // =========================== begin of ML part =========================== #ifdef ML_SCALING timeVec[precBuild].value = MPI_Wtime(); #endif ParameterList MLList; if (readXML) { MLList.set("read XML",true); MLList.set("XML input file",xmlFile); } else { cout << "here" << endl; ML_Epetra::SetDefaults("SA",MLList); MLList.set("smoother: type","Chebyshev"); MLList.set("smoother: sweeps",3); MLList.set("coarse: max size",1); } MLList.set("x-coordinates", x_coord); MLList.set("y-coordinates", y_coord); MLList.set("z-coordinates", z_coord); /* RCP<std::vector<int> > m_smootherAztecOptions = rcp(new std::vector<int>(AZ_OPTIONS_SIZE)); RCP<std::vector<double> > m_smootherAztecParams = rcp(new std::vector<double>(AZ_PARAMS_SIZE)); //int m_smootherAztecOptions[AZ_OPTIONS_SIZE]; //double m_smootherAztecParams[AZ_PARAMS_SIZE]; std::string smootherType("Aztec"); AZ_defaults(&(*m_smootherAztecOptions)[0],&(*m_smootherAztecParams)[0]); (*m_smootherAztecOptions)[AZ_precond] = AZ_dom_decomp; (*m_smootherAztecOptions)[AZ_subdomain_solve] = AZ_icc; bool smootherAztecAsSolver = true; */ MLList.set("ML output",10); MLList.set("repartition: enable",1); MLList.set("repartition: max min ratio",1.3); MLList.set("repartition: min per proc",200); MLList.set("repartition: partitioner","Zoltan"); MLList.set("repartition: Zoltan dimensions",2); MLList.set("repartition: put on single proc",1); MLList.set("repartition: Zoltan type","hypergraph"); MLList.set("repartition: estimated iterations",13); /* MLList.set("smoother: Aztec options",m_smootherAztecOptions); MLList.set("smoother: Aztec params",m_smootherAztecParams); MLList.set("smoother: type",smootherType.c_str()); MLList.set("smoother: Aztec as solver", smootherAztecAsSolver); MLList.set("ML print initial list", 0); MLList.set("ML print final list", 0); */ ML_Epetra::MultiLevelPreconditioner* MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList); // verify unused parameters on process 0 (put -1 to print on all // processes) MLPrec->PrintUnused(0); #ifdef ML_SCALING timeVec[precBuild].value = MPI_Wtime() - timeVec[precBuild].value; #endif // =========================== end of ML part ============================= #ifdef ML_SCALING timeVec[solve].value = MPI_Wtime(); #endif solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_cg); solver.SetAztecOption(AZ_output, 1); solver.Iterate(500, 1e-12); #ifdef ML_SCALING timeVec[solve].value = MPI_Wtime() - timeVec[solve].value; #endif // destroy the preconditioner delete MLPrec; // compute the real residual double residual; LHS.Norm2(&residual); if( Comm.MyPID()==0 ) { cout << "||b-Ax||_2 = " << residual << endl; } // for testing purposes if (residual > 1e-5) exit(EXIT_FAILURE); delete A; delete Map; delete coords; #ifdef ML_SCALING timeVec[total].value = MPI_Wtime() - timeVec[total].value; //avg double dupTime[ntimers],avgTime[ntimers]; for (int i=0; i<ntimers; i++) dupTime[i] = timeVec[i].value; MPI_Reduce(dupTime,avgTime,ntimers,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); for (int i=0; i<ntimers; i++) avgTime[i] = avgTime[i]/Comm.NumProc(); //min MPI_Reduce(timeVec,minTime,ntimers,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD); //max MPI_Reduce(timeVec,maxTime,ntimers,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD); if (Comm.MyPID() == 0) { printf("timing : max (pid) min (pid) avg\n"); printf("Problem build : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[probBuild].value,maxTime[probBuild].rank, minTime[probBuild].value,minTime[probBuild].rank, avgTime[probBuild]); printf("Preconditioner build : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[precBuild].value,maxTime[precBuild].rank, minTime[precBuild].value,minTime[precBuild].rank, avgTime[precBuild]); printf("Solve : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[solve].value,maxTime[solve].rank, minTime[solve].value,minTime[solve].rank, avgTime[solve]); printf("Total : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[total].value,maxTime[total].rank, minTime[total].value,minTime[total].rank, avgTime[total]); } #endif #ifdef HAVE_MPI MPI_Finalize(); #endif return(EXIT_SUCCESS); }
//============================================================================== // this requires Epetra_CrsMatrix + OptimizeStorage() int Ifpack_PointRelaxation:: ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int* IndexOffset; int* Indices; double* Values; IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values)); int NumVectors = X.NumVectors(); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) { Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); } else Y2 = Teuchos::rcp( &Y, false ); double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { // only one data exchange per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); for (int i = 0 ; i < NumMyRows_ ; ++i) { int col; double diag = d_ptr[i]; // no need to extract row i //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { int col; double diag = d_ptr[i]; // no need to extract row i //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int i = 0 ; i < NumMyRows_ ; ++i) y_ptr[m][i] = y2_ptr[m][i]; } ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); return(0); }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif int nx; if (argc > 1) nx = (int) strtol(argv[1],NULL,10); else nx = 256; int ny = nx * Comm.NumProc(); // each subdomain is a square ParameterList GaleriList; GaleriList.set("nx", nx); GaleriList.set("ny", ny); GaleriList.set("mx", 1); GaleriList.set("my", Comm.NumProc()); int NumNodes = nx*ny; int NumPDEEqns = 2; Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList); Epetra_CrsMatrix* CrsA = CreateCrsMatrix("Laplace2D", Map, GaleriList); Epetra_VbrMatrix* A = CreateVbrMatrix(CrsA, NumPDEEqns); Epetra_Vector LHS(A->DomainMap()); LHS.PutScalar(0); Epetra_Vector RHS(A->DomainMap()); RHS.Random(); Epetra_LinearProblem Problem(A, &LHS, &RHS); AztecOO solver(Problem); double *x_coord = 0, *y_coord = 0, *z_coord = 0; Epetra_MultiVector *coords = CreateCartesianCoordinates("2D", &(CrsA->Map()), GaleriList); double **ttt; if (!coords->ExtractView(&ttt)) { x_coord = ttt[0]; y_coord = ttt[1]; } else { printf("Error extracting coordinate vectors\n"); # ifdef HAVE_MPI MPI_Finalize() ; # endif exit(EXIT_FAILURE); } ParameterList MLList; SetDefaults("SA",MLList); MLList.set("ML output",10); MLList.set("max levels",10); MLList.set("increasing or decreasing","increasing"); MLList.set("smoother: type", "Chebyshev"); MLList.set("smoother: sweeps", 3); // *) if a low number, it will use all the available processes // *) if a big number, it will use only processor 0 on the next level MLList.set("aggregation: next-level aggregates per process", 1); MLList.set("aggregation: type (level 0)", "Zoltan"); MLList.set("aggregation: type (level 1)", "Uncoupled"); MLList.set("aggregation: type (level 2)", "Zoltan"); MLList.set("aggregation: smoothing sweeps", 2); MLList.set("x-coordinates", x_coord); MLList.set("y-coordinates", y_coord); MLList.set("z-coordinates", z_coord); // specify the reduction with respect to the previous level // (very small values can break the code) int ratio = 16; MLList.set("aggregation: global aggregates (level 0)", NumNodes / ratio); MLList.set("aggregation: global aggregates (level 1)", NumNodes / (ratio * ratio)); MLList.set("aggregation: global aggregates (level 2)", NumNodes / (ratio * ratio * ratio)); MultiLevelPreconditioner* MLPrec = new MultiLevelPreconditioner(*A, MLList, true); solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_cg_condnum); solver.SetAztecOption(AZ_output, 1); solver.Iterate(100, 1e-12); // compute the real residual Epetra_Vector Residual(A->DomainMap()); //1.0 * RHS + 0.0 * RHS - 1.0 * (A * LHS) A->Apply(LHS,Residual); Residual.Update(1.0, RHS, 0.0, RHS, -1.0); double rn; Residual.Norm2(&rn); if (Comm.MyPID() == 0 ) std::cout << "||b-Ax||_2 = " << rn << endl; if (Comm.MyPID() == 0 && rn > 1e-5) { std::cout << "TEST FAILED!!!!" << endl; # ifdef HAVE_MPI MPI_Finalize() ; # endif exit(EXIT_FAILURE); } delete MLPrec; delete coords; delete Map; delete CrsA; delete A; if (Comm.MyPID() == 0) std::cout << "TEST PASSED" << endl; #ifdef HAVE_MPI MPI_Finalize() ; #endif exit(EXIT_SUCCESS); }
int BuildMatrixTests (Epetra_MultiVector & C, const char TransA, const char TransB, const double alpha, Epetra_MultiVector& A, Epetra_MultiVector& B, const double beta, Epetra_MultiVector& C_GEMM ) { // For given values of TransA, TransB, alpha and beta, a (possibly // zero) filled Epetra_MultiVector C, and allocated // Epetra_MultiVectors A, B and C_GEMM this routine will generate values for // Epetra_MultiVectors A, B and C_GEMM such that, if A, B and (this) are // used with GEMM in this class, the results should match the results // generated by this routine. // Test for Strided multivectors (required for GEMM ops) if (!A.ConstantStride() || !B.ConstantStride() || !C_GEMM.ConstantStride() || !C.ConstantStride()) return(-1); // Error int i, j; double fi, fj; // Used for casting loop variables to floats // Get a view of the MultiVectors double *Ap = 0; double *Bp = 0; double *Cp = 0; double *C_GEMMp = 0; int A_nrows = A.MyLength(); int A_ncols = A.NumVectors(); int B_nrows = B.MyLength(); int B_ncols = B.NumVectors(); int C_nrows = C.MyLength(); int C_ncols = C.NumVectors(); int A_Stride = 0; int B_Stride = 0; int C_Stride = 0; int C_GEMM_Stride = 0; A.ExtractView(&Ap, &A_Stride); B.ExtractView(&Bp, &B_Stride); C.ExtractView(&Cp, &C_Stride); C_GEMM.ExtractView(&C_GEMMp, &C_GEMM_Stride); // Define some useful constants int opA_ncols = (TransA=='N') ? A.NumVectors() : A.MyLength(); int opB_nrows = (TransB=='N') ? B.MyLength() : B.NumVectors(); int C_global_inner_dim = (TransA=='N') ? A.NumVectors() : A.GlobalLength(); bool A_is_local = (!A.DistributedGlobal()); bool B_is_local = (!B.DistributedGlobal()); bool C_is_local = (!C.DistributedGlobal()); int A_IndexBase = A.Map().IndexBase(); int B_IndexBase = B.Map().IndexBase(); // Build two new maps that we can use for defining global equation indices below Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm()); Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm()); int* A_MyGlobalElements = new int[A_nrows]; A_Map->MyGlobalElements(A_MyGlobalElements); int* B_MyGlobalElements = new int[B_nrows]; B_Map->MyGlobalElements(B_MyGlobalElements); // Check for compatible dimensions if (C.MyLength() != C_nrows || opA_ncols != opB_nrows || C.NumVectors() != C_ncols || C.MyLength() != C_GEMM.MyLength() || C.NumVectors() != C_GEMM.NumVectors() ) { delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(-2); // Return error } bool Case1 = ( A_is_local && B_is_local && C_is_local); // Case 1 above bool Case2 = (!A_is_local && !B_is_local && C_is_local && TransA=='T' );// Case 2 bool Case3 = (!A_is_local && B_is_local && !C_is_local && TransA=='N');// Case 3 // Test for meaningful cases if (!(Case1 || Case2 || Case3)) { delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(-3); // Meaningless case } /* Fill A, B and C with values as follows: If A_is_local is false: A(i,j) = A_MyGlobalElements[i]*j, i=1,...,numLocalEquations, j=1,...,NumVectors else A(i,j) = i*j, i=1,...,numLocalEquations, j=1,...,NumVectors If B_is_local is false: B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors else B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors In addition, scale each entry by GlobalLength for A and 1/GlobalLength for B--keeps the magnitude of entries in check C_GEMM will depend on A_is_local and B_is_local. Three cases: 1) A_is_local true and B_is_local true: C_GEMM will be local replicated and equal to A*B = i*NumVectors/j 2) A_is_local false and B_is_local false C_GEMM will be local replicated = A(trans)*B(i,j) = i*numGlobalEquations/j 3) A_is_local false B_is_local true C_GEMM will distributed global and equals A*B = A_MyGlobalElements[i]*NumVectors/j */ // Define a scalar to keep magnitude of entries reasonable double sf = C_global_inner_dim; double sfinv = 1.0/sf; // Define A depending on A_is_local if (A_is_local) { for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[i + A_Stride*j] = (fi*sfinv)*fj; } } else { for (j = 0; j <A_ncols ; j++) for (i = 0; i<A_nrows; i++) { fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Ap[i + A_Stride*j] = (fi*sfinv)*fj; } } // Define B depending on TransB and B_is_local if (B_is_local) { for (j = 0; j <B_ncols ; j++) for (i = 0; i<B_nrows; i++) { fi = i+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj); } } else { for (j = 0; j <B_ncols ; j++) for (i = 0; i<B_nrows; i++) { fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1. fj = j+1; Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj); } } // Define C_GEMM depending on A_is_local and B_is_local. C_GEMM is also a // function of alpha, beta, TransA, TransB: // C_GEMM = alpha*A(TransA)*B(TransB) + beta*C_GEMM if (Case1) { for (j = 0; j <C_ncols ; j++) for (i = 0; i<C_nrows; i++) { // Get float version of i and j, offset by 1. fi = (i+1)*C_global_inner_dim; fj = j+1; C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj) + beta * Cp[i + C_Stride*j]; } } else if (Case2) { for (j = 0; j <C_ncols ; j++) for (i = 0; i<C_nrows; i++) { // Get float version of i and j, offset by 1. fi = (i+1)*C_global_inner_dim; fj = j+1; C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj) + beta * Cp[i + C_Stride*j]; } } else { for (j = 0; j <C_ncols ; j++) for (i = 0; i<C_nrows; i++) { // Get float version of i and j. fi = (A_MyGlobalElements[i]+1)*C_global_inner_dim; fj = j+1; C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj) + beta * Cp[i + C_Stride*j]; } } delete A_Map; delete B_Map; delete [] A_MyGlobalElements; delete [] B_MyGlobalElements; return(0); }
int Ifpack_PointRelaxation:: ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int* IndexOffset; int* Indices; double* Values; IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values)); int NumVectors = X.NumVectors(); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) { Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); } else Y2 = Teuchos::rcp( &Y, false ); double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { // only one data exchange per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); if(!DoBackwardGS_){ /* Forward Mode */ for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i=LocalSmoothingIndices_[ii]; int col; double diag = d_ptr[i]; for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } } else { /* Backward Mode */ for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) { int i=LocalSmoothingIndices_[ii]; int col; double diag = d_ptr[i]; for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } } if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i=LocalSmoothingIndices_[ii]; y_ptr[m][i] = y2_ptr[m][i]; } } #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); #endif return(0); } //ApplyInverseGS_LocalFastCrsMatrix()
int LOCA::Epetra::AugmentedOp::ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { // Get number of vectors int n = Input.NumVectors(); // Check num vectors is correct if (importedInput == NULL || n != importedInput->NumVectors()) globalData->locaErrorCheck->throwError( "LOCA::Epetra::AugmentedOp::ApplyInverse()" "Must call init() before ApplyInverse()!"); // Import parameter components importedInput->Import(Input, *extendedImporter, Insert); // get views double **input_view; double **result_view; importedInput->ExtractView(&input_view); Result.ExtractView(&result_view); // get views of paramter components // this is done by setting the pointer for each column to start at // underlyingLength double **input_view_y = new double*[n]; for (int i=0; i<n; i++) input_view_y[i] = input_view[i]+underlyingLength; // break input, result into components Epetra_MultiVector input_x(View, underlyingMap, input_view, n); Epetra_MultiVector input_y(View, localMap, input_view_y, n); Epetra_MultiVector result_x(View, underlyingMap, result_view, n); // copy input_y into result_y result_y->Scale(1.0, input_y); // Temporary epetra vectors Epetra_MultiVector tmp2(c); tmp->PutScalar(0.0); // Solve J*result_x = input_x jacOperator->ApplyInverse(input_x, result_x); if (useTranspose) { // Solve J*tmp = b jacOperator->ApplyInverse(*b, *tmp); // Compute input_y - a^T*result_x result_y->Multiply('T', 'N', -1.0, *a, result_x, 1.0); // Compute c - a^T*tmp tmp2.Multiply('T', 'N', -1.0, *a, *tmp, 1.0); } else { // Solve J*tmp = a jacOperator->ApplyInverse(*a, *tmp); // Compute input_y - b^T*result_x result_y->Multiply('T', 'N', -1.0, *b, result_x, 1.0); // Compute c - b^T*tmp1 tmp2.Multiply('T', 'N', -1.0, *b, *tmp, 1.0); } // Solve (c - z^T*tmp1)^-1 * (input_y - z^T*result_x) where z = a or b int *ipiv = new int[numConstraints]; int info; double *result_y_view; int result_y_lda; result_y->ExtractView(&result_y_view, &result_y_lda); double *tmp2_view; int tmp2_lda; tmp2.ExtractView(&tmp2_view, &tmp2_lda); dlapack.GESV(numConstraints, n, tmp2_view, tmp2_lda, ipiv, result_y_view, result_y_lda, &info); delete [] ipiv; if (info != 0) { globalData->locaErrorCheck->throwError( "LOCA::Epetra::AugmentedOp::ApplyInverse()" "Solve of dense matrix failed!"); } // Compute result_x = result_x - tmp*result_y result_x.Multiply('N', 'N', -1.0, *tmp, *result_y, 1.0); // Set parameter component if (haveParamComponent) for (int j=0; j<n; j++) for (int i=0; i<numConstraints; i++) result_view[j][underlyingLength+i] = (*result_y)[j][i]; delete [] input_view_y; return 0; }
//============================================================================== int Ifpack_PointRelaxation:: ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int NumVectors = X.NumVectors(); int Length = Matrix().MaxNumEntries(); std::vector<int> Indices(Length); std::vector<double> Values(Length); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) { Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); } else Y2 = Teuchos::rcp( &Y, false ); double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { // only one data exchange per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; double diag = d_ptr[i]; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; double diag = d_ptr[i]; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; y_ptr[m][i] = y2_ptr[m][i]; } } #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); #endif return(0); }
// ================================================ ====== ==== ==== == = // Apply the preconditioner w/ RHS B and get result X int ML_Epetra::LevelWrap::ApplyInverse(const Epetra_MultiVector& B, Epetra_MultiVector& X_) const{ #ifdef ML_TIMING double t_time,t_diff; StartTimer(&t_time); #endif // Sanity Checks if (!B.Map().SameAs(OperatorDomainMap())) return -1; if (!X_.Map().SameAs(OperatorRangeMap())) return -1; if (!X_.Map().SameAs(B.Map())) return -1; if (B.NumVectors() != X_.NumVectors()) return -1; // Build new work vector X Epetra_MultiVector X(X_.Map(),X_.NumVectors(),true); Epetra_MultiVector tmp0(X_.Map(),X_.NumVectors(),true); Epetra_MultiVector tmp1(P0_->DomainMap(),X_.NumVectors(),true); Epetra_MultiVector tmp2(P0_->DomainMap(),X_.NumVectors(),true); double *Bptr, *Xptr; int BLDA, XLDA; // Pre Smoother if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){ if (use_mlsmoother_) { // ML smoothing is experimental B.ExtractView(&Bptr,&BLDA); X.ExtractView(&Xptr,&XLDA); ML_Smoother_Apply(&(ml_subproblem_->pre_smoother[0]), X.MyLength(), Xptr, X.MyLength(), Bptr, ML_ZERO); } else Smoother_->ApplyInverse(B,X); A0_->Apply(X,tmp0); tmp0.Update(1.0,B,-1.0); } else tmp0 = B; // Form coarse residual if(use_pt_) P0_->Multiply(true,tmp0,tmp1); else R0_->Multiply(false,tmp0,tmp1); // Solve coarse problem A1prec_->ApplyInverse(tmp1,tmp2); // Update solution P0_->Multiply(false,tmp2,tmp0); X.Update(1.0,tmp0,1.0); // Post Smoother if(pre_or_post==ML_BOTH || pre_or_post==ML_POSTSMOOTHER){ if (use_mlsmoother_) { // ML smoothing is experimental Epetra_MultiVector tmp3(X_.Map(),X_.NumVectors(),true); A0_->Apply(X,tmp0); tmp0.Update(1.0,B,-1.0); tmp3.PutScalar(0.0); tmp0.ExtractView(&Bptr,&BLDA); tmp3.ExtractView(&Xptr,&XLDA); ML_Smoother_Apply(&(ml_subproblem_->pre_smoother[0]),X.MyLength(),Xptr, X.MyLength(), Bptr, ML_ZERO); X.Update(1.0,tmp3,1.0); } else Smoother_->ApplyInverse(B,X); } // Copy to output X_=X; #ifdef ML_TIMING StopTimer(&t_time,&t_diff); /* Output */ ML_Comm *comm_; ML_Comm_Create(&comm_); ApplicationTime_+= t_diff; if(FirstApplication_){ FirstApplication_=false; FirstApplicationTime_=ApplicationTime_; }/*end if*/ ML_Comm_Destroy(&comm_); #endif return 0; }