//=============================================================================
int Epetra_MsrMatrix::Multiply(bool TransA,
                               const Epetra_MultiVector& X,
                               Epetra_MultiVector& Y) const
{
  (void)TransA;
  int NumVectors = X.NumVectors();
  if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1);  // X and Y must have same number of vectors

  double ** xptrs;
  double ** yptrs;
  X.ExtractView(&xptrs);
  Y.ExtractView(&yptrs);
  if (RowMatrixImporter()!=0) {
    if (ImportVector_!=0) {
      if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
    }
    if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors); // Create import vector if needed
    ImportVector_->Import(X, *RowMatrixImporter(), Insert);
    ImportVector_->ExtractView(&xptrs);
  }
  for (int i=0; i<NumVectors; i++)
    Amat_->matvec(xptrs[i], yptrs[i], Amat_, proc_config_);
  
  double flops = NumGlobalNonzeros();
  flops *= 2.0;
  flops *= (double) NumVectors;
  UpdateFlops(flops);
  return(0);
}
int Epetra_PETScAIJMatrix::Multiply(bool TransA,
                               const Epetra_MultiVector& X,
                               Epetra_MultiVector& Y) const
{
  (void)TransA;
  int NumVectors = X.NumVectors();
  if (NumVectors!=Y.NumVectors()) EPETRA_CHK_ERR(-1);  // X and Y must have same number of vectors

  double ** xptrs;
  double ** yptrs;
  X.ExtractView(&xptrs);
  Y.ExtractView(&yptrs);
  if (RowMatrixImporter()!=0) {
    if (ImportVector_!=0) {
      if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
    }
    if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(RowMatrixColMap(),NumVectors);
    ImportVector_->Import(X, *RowMatrixImporter(), Insert);
    ImportVector_->ExtractView(&xptrs);
  }

  double *vals=0;
  int length;
  Vec petscX, petscY;
  int ierr;
  for (int i=0; i<NumVectors; i++) {
#   ifdef HAVE_MPI
    ierr=VecCreateMPIWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr);
    ierr=VecCreateMPIWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr);
#   else //FIXME  untested
    ierr=VecCreateSeqWithArray(Comm_->Comm(),X.MyLength(),X.GlobalLength(),xptrs[i],&petscX); CHKERRQ(ierr);
    ierr=VecCreateSeqWithArray(Comm_->Comm(),Y.MyLength(),Y.GlobalLength(),yptrs[i],&petscY); CHKERRQ(ierr);
#   endif

    ierr = MatMult(Amat_,petscX,petscY);CHKERRQ(ierr);

    ierr = VecGetArray(petscY,&vals);CHKERRQ(ierr);
    ierr = VecGetLocalSize(petscY,&length);CHKERRQ(ierr);
    for (int j=0; j<length; j++) yptrs[i][j] = vals[j];
    ierr = VecRestoreArray(petscY,&vals);CHKERRQ(ierr);
  }

  VecDestroy(petscX); VecDestroy(petscY);
  
  double flops = NumGlobalNonzeros();
  flops *= 2.0;
  flops *= (double) NumVectors;
  UpdateFlops(flops);
  return(0);
} //Multiply()
Epetra_OskiMultiVector::Epetra_OskiMultiVector(const Epetra_MultiVector& Source) 
  : Epetra_MultiVector(Source),
  Epetra_View_(&Source), 
  Copy_Created_(false) {
    double* A;
    double** Aptr;
    int LDA;
    int* LDAptr;
    LDAptr = new int[1];
    Aptr = new double*[1];
    if(Source.ConstantStride() || (Source.NumVectors() == 1)) {
      if(Source.ExtractView(Aptr, LDAptr))
        std::cerr << "Extract view failed\n";
      else
        Oski_View_ = oski_CreateMultiVecView(*Aptr, Source.MyLength(), Source.NumVectors(), LAYOUT_COLMAJ, *LDAptr);
    }
    else {
      Copy_Created_ = true;
      LDA = Source.MyLength();
      A = new double[LDA*Source.NumVectors()];
      if(Source.ExtractCopy(A, LDA))
        std::cerr << "Extract copy failed\n";
      else
        Oski_View_ = oski_CreateMultiVecView(A, Source.MyLength(), Source.NumVectors(), LAYOUT_COLMAJ, LDA);
    }
    delete [] LDAptr;
    delete [] Aptr;
}
Esempio n. 4
0
//EpetraMultiVector_To_TpetraMultiVector: copies Epetra_MultiVector to non-const Tpetra_MultiVector
Teuchos::RCP<Tpetra_MultiVector> Petra::EpetraMultiVector_To_TpetraMultiVector(const Epetra_MultiVector& epetraMV_,
                                                               const Teuchos::RCP<const Teuchos::Comm<int> >& commT_)
{
  //get map from epetraMV_ and convert to Tpetra::Map
  auto mapT = EpetraMap_To_TpetraMap(epetraMV_.Map(), commT_);
  //copy values from epetraMV_
  int numVectors = epetraMV_.NumVectors();
  int Length;
  ST *values;
  epetraMV_.ExtractView(&values, &Length);
  Teuchos::ArrayView<ST> valuesAV = Teuchos::arrayView(values, Length*numVectors);
  //create Tpetra_MultiVector copy of epetraMV_
  Teuchos::RCP<Tpetra_MultiVector> tpetraMV_ = Teuchos::rcp(new Tpetra_MultiVector(mapT, valuesAV, Length, numVectors));
  return tpetraMV_;
}
// Convert a Epetra_MultiVector with assumed block structure dictated by the
// vector space into a Thyra::MultiVectorBase object.
// const Teuchos::RCP<const Thyra::MultiVectorBase<double> > blockEpetraToThyra(const Epetra_MultiVector & e,const Teuchos::RCP<const Thyra::VectorSpaceBase<double> > & vs)
void blockEpetraToThyra(const Epetra_MultiVector & epetraX,const Teuchos::Ptr<Thyra::MultiVectorBase<double> > & thyraX) 
{
   TEUCHOS_ASSERT(thyraX->range()->dim()==epetraX.GlobalLength());

   // extract local information from the Epetra_MultiVector
   int leadingDim=0,numVectors=0,localDim=0;
   double * epetraData=0;
   epetraX.ExtractView(&epetraData,&leadingDim);

   numVectors = epetraX.NumVectors();

   blockEpetraToThyra(numVectors,epetraData,leadingDim,thyraX.ptr(),localDim);   

   TEUCHOS_ASSERT(localDim==epetraX.MyLength());
}
// Convert a Thyra::MultiVectorBase object to a Epetra_MultiVector object with
// the map defined by the Epetra_Map.
// const Teuchos::RCP<const Epetra_MultiVector> 
// blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & tX,const RCP<const Epetra_Map> & map)
void blockThyraToEpetra(const Teuchos::RCP<const Thyra::MultiVectorBase<double> > & thyraX,Epetra_MultiVector & epetraX)
{
   // build an Epetra_MultiVector object
   int numVectors = thyraX->domain()->dim();

   // make sure the number of vectors are the same
   TEUCHOS_ASSERT(numVectors==epetraX.NumVectors());
   TEUCHOS_ASSERT(thyraX->range()->dim()==epetraX.GlobalLength());

   // extract local information from the Epetra_MultiVector
   int leadingDim=0,localDim=0;
   double * epetraData=0;
   epetraX.ExtractView(&epetraData,&leadingDim);

   // perform recursive copy
   blockThyraToEpetra(numVectors,epetraData,leadingDim,thyraX,localDim);

   // sanity check
   TEUCHOS_ASSERT(localDim==epetraX.Map().NumMyElements());
}
//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int NumVectors = X.NumVectors();

  int Length = Matrix().MaxNumEntries();
  std::vector<int> Indices(Length);
  std::vector<double> Values(Length);

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_)
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  else
    Y2 = Teuchos::rcp( &Y, false );

  // extract views (for nicer and faster code)
  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int j = 0; j < NumSweeps_ ; j++) {

    // data exchange is here, once per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    // FIXME: do I really need this code below?
    if (NumVectors == 1) {

      double* y0_ptr = y_ptr[0];
      double* y20_ptr = y2_ptr[0];
      double* x0_ptr = x_ptr[0];

      if(!DoBackwardGS_){
        /* Forward Mode */
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          double dtemp = 0.0;
          for (int k = 0 ; k < NumEntries ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y20_ptr[col];
          }

          y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
        }
      }
      else {
        /* Backward Mode */
        for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

          int NumEntries;
          int col;
          (void) col; // Forestall compiler warning for unused variable.
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));
          double dtemp = 0.0;
          for (int k = 0 ; k < NumEntries ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y20_ptr[i];
          }

          y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
        }
      }

      // using Export() sounded quite expensive
      if (IsParallel_)
        for (int i = 0 ; i < NumMyRows_ ; ++i)
          y0_ptr[i] = y20_ptr[i];

    }
    else {
      if(!DoBackwardGS_){
        /* Forward Mode */
        for (int i = 0 ; i < NumMyRows_ ; ++i) {

          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          for (int m = 0 ; m < NumVectors ; ++m) {

            double dtemp = 0.0;
            for (int k = 0 ; k < NumEntries ; ++k) {

              col = Indices[k];
              dtemp += Values[k] * y2_ptr[m][col];
            }

            y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
          }
        }
      }
      else {
        /* Backward Mode */
        for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          for (int m = 0 ; m < NumVectors ; ++m) {

            double dtemp = 0.0;
            for (int k = 0 ; k < NumEntries ; ++k) {

              col = Indices[k];
              dtemp += Values[k] * y2_ptr[m][col];
            }

            y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);

          }
        }
      }

      // using Export() sounded quite expensive
      if (IsParallel_)
        for (int m = 0 ; m < NumVectors ; ++m)
          for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
            int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
            y_ptr[m][i] = y2_ptr[m][i];
          }
    }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
#endif

  return(0);
} //ApplyInverseGS_RowMatrix()
int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
  if(!IsComputed_) return -1;
  Time_.ResetStartTime();
  bool initial_guess_is_zero=false;
  const int lclNumRows = W_->NumMyRows();
  const int NumVectors = X.NumVectors();
  Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors);

  double omega=GetOmega();

  // need to create an auxiliary vector, Xcopy
  Teuchos::RCP<const Epetra_MultiVector> Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0]){
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
    // Since the user didn't give us anything better, our initial guess is zero.
    Y.Scale(0.0);
    initial_guess_is_zero=true;
  }
  else
    Xcopy = Teuchos::rcp( &X, false );

  Teuchos::RCP< Epetra_MultiVector > T2;
  // Note: Assuming that the matrix has an importer.  Ifpack_PointRelaxation doesn't do this, but given that
  // I have a CrsMatrix, I'm probably OK.
  // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine
  // if things are on or off processor.
  // Note: T2 must be zero'd out
  if (IsParallel_ && W_->Importer())  T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true));
  else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true));

  // Pointer grabs
  int* rowptr,*colind;
  double *values;
  double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr;
  T2->ExtractView(&t2_ptr);
  Y.ExtractView(&y_ptr);
  Temp.ExtractView(&t_ptr);
  Xcopy->ExtractView(&x_ptr);
  Wdiag_->ExtractView(&d_ptr);
  IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values));


  for(int i=0; i<NumSweeps_; i++){
    // Calculate b-Ax
    if(!initial_guess_is_zero  || i > 0) {
      A_->Apply(Y,Temp);
      Temp.Update(1.0,*Xcopy,-1.0);
    }
    else
      Temp.Update(1.0,*Xcopy,0.0);

    // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated
    // in this sweep before they are used, so we don't need to reset T2 to zero here.

    // Do backsolve & update
    // x = x  + W^{-1} (b - A x)
    for(int j=0; j<lclNumRows; j++){
      double diag=d_ptr[j];
      for (int m=0 ; m<NumVectors; m++) {
        double dtmp=0.0;
        // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute.
        t2_ptr[m][j]=0.0;
        for(int k=rowptr[j];k<rowptr[j+1];k++){
          dtmp+= values[k]*t2_ptr[m][colind[k]];
        }
        // Yes, we need to update both of these.
        t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag;
        y_ptr[m][j] += omega*t2_ptr[m][j];
      }
    }
  }

  // Counter update
  NumApplyInverse_++;
  ApplyInverseTime_ += Time_.ElapsedTime();
  return 0;
}
Esempio n. 9
0
//==============================================================================
int Ifpack_Chebyshev::
ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  
  if (!IsComputed())
    IFPACK_CHK_ERR(-3);

  if (PolyDegree_ == 0)
    return 0;

  int nVec = X.NumVectors();
  int len = X.MyLength();
  if (nVec != Y.NumVectors())
    IFPACK_CHK_ERR(-2);

  Time_->ResetStartTime();

  // AztecOO gives X and Y pointing to the same memory location,
  // need to create an auxiliary vector, Xcopy
  Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0])
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
  else
    Xcopy = Teuchos::rcp( &X, false );

  double **xPtr = 0, **yPtr = 0;
  Xcopy->ExtractView(&xPtr);
  Y.ExtractView(&yPtr);

#ifdef HAVE_IFPACK_EPETRAEXT
  EpetraExt_PointToBlockDiagPermute* IBD=0;
  if (UseBlockMode_) IBD=&*InvBlockDiagonal_;
#endif
  

  //--- Do a quick solve when the matrix is identity
  double *invDiag=0;
  if(!UseBlockMode_) invDiag=InvDiagonal_->Values();
  if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) {
#ifdef HAVE_IFPACK_EPETRAEXT
    if(UseBlockMode_) IBD->ApplyInverse(*Xcopy,Y);
    else
#endif
    if (nVec == 1) {
      double *yPointer = yPtr[0], *xPointer = xPtr[0];
      for (int i = 0; i < len; ++i)
        yPointer[i] = xPointer[i]*invDiag[i];
    }
    else {
      int i, k;
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i];
        for (k = 0; k < nVec; ++k)
          yPtr[k][i] = xPtr[k][i] * coeff;
      }
    } // if (nVec == 1)
    return 0;
  } // if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_))

  //--- Initialize coefficients
  // Note that delta stores the inverse of ML_Cheby::delta
  double alpha = LambdaMax_ / EigRatio_;
  double beta = 1.1 * LambdaMax_;
  double delta = 2.0 / (beta - alpha);
  double theta = 0.5 * (beta + alpha);
  double s1 = theta * delta;

  //--- Define vectors
  // In ML_Cheby, V corresponds to pAux and W to dk
  Epetra_MultiVector V(X);
  Epetra_MultiVector W(X);
#ifdef HAVE_IFPACK_EPETRAEXT
  Epetra_MultiVector Temp(X);
#endif
  
  double *vPointer = V.Values(), *wPointer = W.Values();

  double oneOverTheta = 1.0/theta;
  int i, j, k;


  //--- If solving normal equations, multiply RHS by A^T
  if(SolveNormalEquations_){
    Apply_Transpose(Operator_,Y,V);
    Y=V;
  }

  // Do the smoothing when block scaling is turned OFF
  // --- Treat the initial guess
  if (ZeroStartingSolution_ == false) {
    Operator_->Apply(Y, V);
    // Compute W = invDiag * ( X - V )/ Theta
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      Temp.Update(oneOverTheta,X,-oneOverTheta,V,0.0);
      IBD->ApplyInverse(Temp,W);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(W,Temp);
	Apply_Transpose(Operator_,Temp,W);
      }
    }
    else
#endif
    if (nVec == 1) {
      double *xPointer = xPtr[0];
      for (i = 0; i < len; ++i)
        wPointer[i] = invDiag[i] * (xPointer[i] - vPointer[i]) * oneOverTheta;
    }
    else {
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*oneOverTheta;
        double *wi = wPointer + i, *vi = vPointer + i;
        for (k = 0; k < nVec; ++k) {
          *wi = (xPtr[k][i] - (*vi)) * coeff;
          wi = wi + len; vi = vi + len;
        }
      }
    } // if (nVec == 1)
    // Update the vector Y
    Y.Update(1.0, W, 1.0);
  }
  else {
    // Compute W = invDiag * X / Theta
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      IBD->ApplyInverse(X,W);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(W,Temp);
	Apply_Transpose(Operator_,Temp,W);
      }

      W.Scale(oneOverTheta);
      Y.Update(1.0, W, 0.0);      
    }
    else
#endif
    if (nVec == 1) {
      double *xPointer = xPtr[0];
      for (i = 0; i < len; ++i){
        wPointer[i] = invDiag[i] * xPointer[i] * oneOverTheta;
      }
      memcpy(yPtr[0], wPointer, len*sizeof(double));
    }
    else {
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*oneOverTheta;
        double *wi = wPointer + i;
        for (k = 0; k < nVec; ++k) {
          *wi = xPtr[k][i] * coeff;
          wi = wi + len;
        }
      }
      for (k = 0; k < nVec; ++k)
        memcpy(yPtr[k], wPointer + k*len, len*sizeof(double));
    } // if (nVec == 1)
  } // if (ZeroStartingSolution_ == false)
  
  //--- Apply the polynomial
  double rhok = 1.0/s1, rhokp1;
  double dtemp1, dtemp2;
  int degreeMinusOne = PolyDegree_ - 1;
  if (nVec == 1) {
    double *xPointer = xPtr[0];
    for (k = 0; k < degreeMinusOne; ++k) {
      Operator_->Apply(Y, V);
      rhokp1 = 1.0 / (2.0*s1 - rhok);
      dtemp1 = rhokp1 * rhok;
      dtemp2 = 2.0 * rhokp1 * delta;
      rhok = rhokp1;
      // Compute W = dtemp1 * W
      W.Scale(dtemp1);
      // Compute W = W + dtemp2 * invDiag * ( X - V )
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      //NTS: We can clobber V since it will be reset in the Apply
      V.Update(dtemp2,X,-dtemp2);
      IBD->ApplyInverse(V,Temp);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(V,Temp);
	Apply_Transpose(Operator_,Temp,V);
      }

      W.Update(1.0,Temp,1.0);
    }
    else{
#endif
      for (i = 0; i < len; ++i)
        wPointer[i] += dtemp2* invDiag[i] * (xPointer[i] - vPointer[i]);
#ifdef HAVE_IFPACK_EPETRAEXT
    }
#endif

      // Update the vector Y
      Y.Update(1.0, W, 1.0);
    } // for (k = 0; k < degreeMinusOne; ++k)
  }
  else {
    for (k = 0; k < degreeMinusOne; ++k) {
      Operator_->Apply(Y, V);
      rhokp1 = 1.0 / (2.0*s1 - rhok);
      dtemp1 = rhokp1 * rhok;
      dtemp2 = 2.0 * rhokp1 * delta;
      rhok = rhokp1;
      // Compute W = dtemp1 * W
      W.Scale(dtemp1);
      // Compute W = W + dtemp2 * invDiag * ( X - V )
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      //We can clobber V since it will be reset in the Apply
      V.Update(dtemp2,X,-dtemp2);
      IBD->ApplyInverse(V,Temp);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(V,Temp);
	Apply_Transpose(Operator_,Temp,V);
      }


      W.Update(1.0,Temp,1.0);
    }
    else{
#endif
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*dtemp2;
        double *wi = wPointer + i, *vi = vPointer + i;
        for (j = 0; j < nVec; ++j) {
          *wi += (xPtr[j][i] - (*vi)) * coeff;
          wi = wi + len; vi = vi + len;
        }
      }
#ifdef HAVE_IFPACK_EPETRAEXT
    }
#endif      
      // Update the vector Y
      Y.Update(1.0, W, 1.0);
    } // for (k = 0; k < degreeMinusOne; ++k)
  } // if (nVec == 1)

  
  // Flops are updated in each of the following. 
  ++NumApplyInverse_;
  ApplyInverseTime_ += Time_->ElapsedTime();
  return(0);
}
Esempio n. 10
0
int Amesos_Scalapack::Solve() { 
  
  if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl;
  
  NumSolve_++;
  
  Epetra_MultiVector   *vecX = Problem_->GetLHS() ; 
  Epetra_MultiVector   *vecB = Problem_->GetRHS() ; 
  
  //
  //  Compute the number of right hands sides 
  //  (and check that X and B have the same shape) 
  //
  int nrhs; 
  if ( vecX == 0 ) { 
    nrhs = 0 ;
    EPETRA_CHK_ERR( vecB != 0 ) ; 
  } else { 
    nrhs = vecX->NumVectors() ; 
    EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; 
  }
  
  Epetra_MultiVector *ScalapackB =0;
  Epetra_MultiVector *ScalapackX =0;
  //
  //  Extract Scalapack versions of X and B 
  //
  double *ScalapackXvalues ;
  
  Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
  Time_->ResetStartTime(); // track time to broadcast vectors
  //
  //  Copy B to the scalapack version of B
  //
  const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap();
  Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  
  Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap );
  ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ;
  ScalapackB = ScalapackBextract ; 
  ScalapackX = ScalapackXextract ; 
  
  VecTime_ += Time_->ElapsedTime();
  
  //
  //  Call SCALAPACKs PDGETRS to perform the solve
  //
  
  int DescX[10];  
  
  ScalapackX->Scale(1.0, *ScalapackB) ;  
  
  int ScalapackXlda ; 
  
  Time_->ResetStartTime(); // tract time to solve
  
  //
  //  Setup DescX 
  //
  
  if( nrhs > nb_ ) {
    EPETRA_CHK_ERR( -2 );  
  }
  
  int Ierr[1] ; 
  Ierr[0] = 0 ; 
  const int zero = 0 ; 
  const int one = 1 ; 
  if ( iam_ < nprow_ * npcol_ ) {
    assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; 
    
    if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = "  <<  ScalapackXlda 
		      << " lda_ = "  << lda_ 
		      << " nprow_ = "  << nprow_ 
		      << " npcol_ = "  << npcol_ 
		      << " myprow_ = "  << myprow_ 
		      << " mypcol_ = "  << mypcol_ 
		      << " iam_ = "  << iam_ << std::endl ;
    if (  TwoD_distribution_ )    assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; 
    
    DESCINIT_F77(DescX, 
		 &NumGlobalElements_, 
		 &nrhs, 
		 &nb_,
		 &nb_,
		 &zero,
		 &zero,
		 &ictxt_,
		 &lda_,
		 Ierr ) ;
    assert( Ierr[0] == 0 ) ; 
    
    //
    //  For the 1D data distribution, we factor the transposed 
    //  matrix, hence we must invert the sense of the transposition
    //
    char trans = 'N';
    if ( TwoD_distribution_ ) {
      if ( UseTranspose() ) trans = 'T' ;
    } else {
      
      if ( ! UseTranspose() ) trans = 'T' ;
    }
    
    if ( nprow_ * npcol_ == 1 ) { 
      DGETRS_F77(&trans,
		 &NumGlobalElements_,  
		 &nrhs, 
		 &DenseA_[0],
		 &lda_,
		 &Ipiv_[0],
		 ScalapackXvalues,
		 &lda_,
		 Ierr ) ;
    } else { 
      PDGETRS_F77(&trans,
		  &NumGlobalElements_,  
		  &nrhs, 
		  &DenseA_[0],
		  &one,
		  &one, 
		  DescA_,
		  &Ipiv_[0],
		  ScalapackXvalues,
		  &one,
		  &one, 
		  DescX,
		  Ierr ) ;
    }
  }
  
  SolTime_ += Time_->ElapsedTime();
  
  Time_->ResetStartTime();  // track time to broadcast vectors
  //
  //  Copy X back to the original vector
  // 
  Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ );
  vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ;
  delete ScalapackBextract ;
  delete ScalapackXextract ;
  
  VecTime_ += Time_->ElapsedTime();
  
  //  All processes should return the same error code
  if ( nprow_ * npcol_ < Comm().NumProc() ) 
    Comm().Broadcast( Ierr, 1, 0 ) ; 
  
  // MS // compute vector norms
  if( ComputeVectorNorms_ == true || verbose_ == 2 ) {
    double NormLHS, NormRHS;
    for( int i=0 ; i<nrhs ; ++i ) {
      assert((*vecX)(i)->Norm2(&NormLHS)==0);
      assert((*vecB)(i)->Norm2(&NormRHS)==0);
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS
	     << ", ||b|| = " << NormRHS << std::endl;
      }
    }
  }
  
  // MS // compute true residual
  if( ComputeTrueResidual_ == true || verbose_ == 2  ) {
    double Norm;
    Epetra_MultiVector Ax(vecB->Map(),nrhs);
    for( int i=0 ; i<nrhs ; ++i ) {
      (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax));
      (Ax.Update(1.0, *((*vecB)(i)), -1.0));
      (Ax.Norm2(&Norm));
      
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl;
      }
    }
  }
  
  return Ierr[0];
  
}
int ShyLU_Probing_Operator::Apply(const Epetra_MultiVector &X,
            Epetra_MultiVector &Y) const
{
#ifdef TIMING_OUTPUT
    apply_time_->start();
#endif

    int nvectors = X.NumVectors();
    bool local = (C_->Comm().NumProc() == 1);
    int err;
    //cout << "No of colors after probing" << nvectors << endl;

#ifdef TIMING_OUTPUT
    matvec_time_->start();
#endif

    err = G_->Multiply(false, X, *temp2);
    assert(err == 0);
    if (!local)
        err = C_->Multiply(false, X, *temp);
    else
    {
        // localize X
        double *values;
        int mylda;
        X.ExtractView(&values, &mylda);

       Epetra_SerialComm LComm;        // Use Serial Comm for the local blocks.
       Epetra_Map SerialMap(X.Map().NumMyElements(), X.Map().NumMyElements(),
                   X.Map().MyGlobalElements(), 0, LComm);
       Epetra_MultiVector Xl(View, SerialMap, values, mylda, X.NumVectors());
       err = C_->Multiply(false, Xl, *temp);
    }
    assert(err == 0);

#ifdef TIMING_OUTPUT
    matvec_time_->stop();
#endif

    int nrows = C_->RowMap().NumMyElements();

#ifdef DEBUG
    cout << "DEBUG MODE" << endl;
    assert(nrows == localDRowMap_->NumGlobalElements());

    int gids[nrows], gids1[nrows];
    C_->RowMap().MyGlobalElements(gids);
    localDRowMap_->MyGlobalElements(gids1);

    for (int i = 0; i < nrows; i++)
    {
       assert(gids[i] == gids1[i]);
    }
#endif

#ifdef TIMING_OUTPUT
    localize_time_->start();
#endif

    //int err;
    int lda;
    double *values;
    if (!local)
    {
        err = temp->ExtractView(&values, &lda);
        assert (err == 0);

        // copy to local vector //TODO: OMP parallel
        assert(lda == nrows);

    //#pragma omp parallel for shared(nvectors, nrows, values)
        for (int v = 0; v < nvectors; v++)
        {
           for (int i = 0; i < nrows; i++)
           {
               err = ltemp->ReplaceMyValue(i, v, values[i+v*lda]);
               assert (err == 0);
           }
        }
    }

#ifdef TIMING_OUTPUT
    localize_time_->stop();
    trisolve_time_->start();
#endif

    if (!local)
    {
        LP_->SetRHS(ltemp.getRawPtr());
    }
    else
    {
        //LP_->SetRHS(temp.getRawPtr());
    }
    //LP_->SetLHS(localX.getRawPtr());

    //TODO: Why not just in Reset(). Check the distr path.
    ssym_->OrigLP->SetLHS(localX.getRawPtr());
    ssym_->OrigLP->SetRHS(temp.getRawPtr());
    ssym_->ReIdx_LP->fwd();
    solver_->Solve();

#ifdef TIMING_OUTPUT
    trisolve_time_->stop();
    dist_time_->start();
#endif

    if (!local)
    {
        err = localX->ExtractView(&values, &lda);
        assert (err == 0);

        //Copy back to dist vector //TODO: OMP parallel
    //#pragma omp parallel for
        for (int v = 0; v < nvectors; v++)
        {
           for (int i = 0; i < nrows; i++)
           {
               err = temp->ReplaceMyValue(i, v, values[i+v*lda]);
               assert (err == 0);
           }
        }
    }

#ifdef TIMING_OUTPUT
    dist_time_->stop();
    matvec2_time_->start();
#endif

    if (!local)
    {
        R_->Multiply(false, *temp, Y);
    }
    else
    {
        // Should Y be localY in Multiply and then exported to Y ?? TODO:
        // Use view mode ?
        double *values;
        int mylda;
        Y.ExtractView(&values, &mylda);

       Epetra_SerialComm LComm;        // Use Serial Comm for the local blocks.
       Epetra_Map SerialMap(Y.Map().NumMyElements(), Y.Map().NumMyElements(),
                   Y.Map().MyGlobalElements(), 0, LComm);
       Epetra_MultiVector Yl(View, SerialMap, values, mylda, Y.NumVectors());
        R_->Multiply(false, *localX, Yl);
    }

#ifdef TIMING_OUTPUT
    matvec2_time_->stop();
    update_time_->start();
#endif

    err = Y.Update(1.0, *temp2, -1.0);
    //cout << Y.MyLength() << " " << temp2.MyLength() << endl;
    assert(err == 0);

#ifdef TIMING_OUTPUT
    update_time_->stop();
    apply_time_->stop();
#endif
    cntApply++;
    return 0;
}
Esempio n. 12
0
int  BuildMultiVectorTests (Epetra_MultiVector & C, const double alpha, 
				Epetra_MultiVector& A, 
				Epetra_MultiVector& sqrtA,
				Epetra_MultiVector& B,
				Epetra_MultiVector& C_alphaA,
				Epetra_MultiVector& C_alphaAplusB,
				Epetra_MultiVector& C_plusB,
				double* const dotvec_AB,
				double* const norm1_A,
				double* const norm2_sqrtA,
				double* const norminf_A,
				double* const normw_A,
				Epetra_MultiVector& Weights,
				double* const minval_A,
				double* const maxval_A,
				double* const meanval_A ) {

  // For given values alpha and a (possibly zero) filled 
  // Epetra_MultiVector (the this object), allocated double * arguments dotvec_AB, 
  // norm1_A, and norm2_A, and allocated Epetra_MultiVectors A, sqrtA,
  // B, C_alpha, C_alphaAplusB and C_plusB, this method will generate values for 
  // Epetra_MultiVectors A, B and all of the additional arguments on
  // the list above such that, if A, B and (this) are used with the methods in 
  // this class, the results should match the results generated by this routine.
  // Specifically, the results in dotvec_AB should match those from a call to 
  // A.dotProd (B,dotvec).  Similarly for other routines.
  
  int i,j;
  double fi, fj;  // Used for casting loop variables to floats
  // Define some useful constants
  
  int A_nrows = A.MyLength();
  int A_ncols = A.NumVectors();
  int sqrtA_nrows = sqrtA.MyLength();
  int sqrtA_ncols = sqrtA.NumVectors();
  int B_nrows = B.MyLength();
  int B_ncols = B.NumVectors();
  
  double **Ap = 0;
  double **sqrtAp = 0;
  double **Bp = 0;
  double **Cp = 0;
  double **C_alphaAp = 0;
  double **C_alphaAplusBp = 0;
  double **C_plusBp = 0;
  double **Weightsp = 0;

  A.ExtractView(&Ap);
  sqrtA.ExtractView(&sqrtAp);
  B.ExtractView(&Bp);
  C.ExtractView(&Cp);
  C_alphaA.ExtractView(&C_alphaAp);
  C_alphaAplusB.ExtractView(&C_alphaAplusBp);
  C_plusB.ExtractView(&C_plusBp);
  Weights.ExtractView(&Weightsp);
  
  bool A_is_local = (A.MyLength() == A.GlobalLength());
  bool B_is_local = (B.MyLength() == B.GlobalLength());
  bool C_is_local = (C.MyLength()    == C.GlobalLength());

  int A_IndexBase = A.Map().IndexBase();
  int B_IndexBase = B.Map().IndexBase();
  
    // Build two new maps that we can use for defining global equation indices below
    Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm());
    Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm());

    int* A_MyGlobalElements = new int[A_nrows];
    A_Map->MyGlobalElements(A_MyGlobalElements);
    int* B_MyGlobalElements = new int[B_nrows];
    B_Map->MyGlobalElements(B_MyGlobalElements);

  // Check for compatible dimensions
  
  if (C.MyLength()        != A_nrows     ||
      A_nrows        != B_nrows     ||
      C.NumVectors()    != A_ncols     ||
      A_ncols        != B_ncols     ||
      sqrtA_nrows    != A_nrows     ||
      sqrtA_ncols    != A_ncols     ||
      C.MyLength()        != C_alphaA.MyLength()     ||
      C.NumVectors()    != C_alphaA.NumVectors() ||
      C.MyLength()        != C_alphaAplusB.MyLength()     ||
      C.NumVectors()    != C_alphaAplusB.NumVectors() ||
      C.MyLength()        != C_plusB.MyLength()      ||
      C.NumVectors()    != C_plusB.NumVectors()     ) return(-2); // Return error
  
 
  bool Case1 = ( A_is_local &&  B_is_local &&  C_is_local);  // Case 1
  bool Case2 = (!A_is_local && !B_is_local && !C_is_local);// Case 2
  
  // Test for meaningful cases
  
  if (!(Case1 || Case2)) return(-3); // Meaningless case
  
  /* Fill A and B with values as follows:
     
     If A_is_local is false:
     A(i,j) = A_MyGlobalElements[i]*j,     i=1,...,numLocalEquations, j=1,...,NumVectors
     
     else
     A(i,j) = i*j,     i=1,...,numLocalEquations, j=1,...,NumVectors

     If B_is_local is false:
     B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors

     else
     B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors

     In addition, scale each entry by GlobalLength for A and
     1/GlobalLength for B--keeps the magnitude of entries in check
  */

  //Define scale factor

  double sf = A.GlobalLength();
  double sfinv = 1.0/sf;

  // Define A

  if (A_is_local)
    {
      for (j = 0; j <A_ncols ; j++) 
	{
	  for (i = 0; i<A_nrows; i++)
	    { 
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[j][i] = (fi*sfinv)*fj;
	      sqrtAp[j][i] = std::sqrt(Ap[j][i]);
	    }
	}
    }
  else
    {
      for (j = 0; j <A_ncols ; j++) 
	{
	  for (i = 0; i<A_nrows; i++)
	    { 
	      fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[j][i] = (fi*sfinv)*fj;
	      sqrtAp[j][i] = std::sqrt(Ap[j][i]);
	    }
	}
    }

  // Define B depending on TransB and B_is_local
  
  if (B_is_local)
    {
      for (j = 0; j <B_ncols ; j++) 
	{
	  for (i = 0; i<B_nrows; i++)
	    { 
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[j][i] = 1.0/((fi*sfinv)*fj);
	    }
	}
    }
  else
    {
      for (j = 0; j <B_ncols ; j++) 
	{
	  for (i = 0; i<B_nrows; i++)
	    { 
	      fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[j][i] = 1.0/((fi*sfinv)*fj);
	    }
	}
    }
  
  // Generate C_alphaA = alpha * A

  for (j = 0; j <A_ncols ; j++) 
      for (i = 0; i<A_nrows; i++)
	  C_alphaAp[j][i] = alpha * Ap[j][i];

  // Generate C_alphaA = alpha * A + B

  for (j = 0; j <A_ncols ; j++) 
    for (i = 0; i<A_nrows; i++)
      C_alphaAplusBp[j][i] = alpha * Ap[j][i] + Bp[j][i];
  
  // Generate C_plusB = this + B

  for (j = 0; j <A_ncols ; j++) 
    for (i = 0; i<A_nrows; i++)
      C_plusBp[j][i] = Cp[j][i] + Bp[j][i];

  // Generate dotvec_AB.  Because B(i,j) = 1/A(i,j), dotvec[i] =  C.GlobalLength()

  for (i=0; i< A.NumVectors(); i++) dotvec_AB[i] = C.GlobalLength();

  // For the next two results we want to be careful how we do arithmetic 
  // to avoid very large numbers.
  // We are computing sfinv*(C.GlobalLength()*(C.GlobalLength()+1)/2)

      double result = C.GlobalLength();
      result *= sfinv;
      result /= 2.0;
      result *= (double)(C.GlobalLength()+1);

   // Generate norm1_A.  Can use formula for sum of first n integers.

  for (i=0; i< A.NumVectors(); i++) 
    // m1_A[i] = (i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2;
    norm1_A[i] = result * ((double) (i+1));

  // Generate norm2_sqrtA.  Can use formula for sum of first n integers. 

  for (i=0; i< A.NumVectors(); i++) 
    // norm2_sqrtA[i] = std::sqrt((double) ((i+1)*C.GlobalLength()*(C.GlobalLength()+1)/2));
    norm2_sqrtA[i] = std::sqrt(result * ((double) (i+1)));

  // Generate norminf_A, minval_A, maxval_A, meanval_A. 

  for (i=0; i< A.NumVectors(); i++) 
    {
    norminf_A[i] = (double) (i+1);
    minval_A[i] =  (double) (i+1)/ (double) A.GlobalLength();
    maxval_A[i] = (double) (i+1);
    meanval_A[i] = norm1_A[i]/((double) (A.GlobalLength()));
    }

  // Define weights and expected weighted norm
  for (i=0; i< A.NumVectors(); i++) 
    {
      double ip1 = (double) i+1;
      normw_A[i] = ip1;
      for (j=0; j<A_nrows; j++) Weightsp[i][j] = Ap[i][j]/ip1;
    }
  

  delete A_Map;
  delete B_Map;
  delete [] A_MyGlobalElements;
  delete [] B_MyGlobalElements;
  
  return(0);
}
int 
LOCA::Epetra::AugmentedOp::Apply(const Epetra_MultiVector& Input, 
				 Epetra_MultiVector& Result) const
{

  // Get number of vectors
  int n = Input.NumVectors();

  // Check num vectors is correct
  if (importedInput == NULL || n != importedInput->NumVectors())
    globalData->locaErrorCheck->throwError("LOCA::Epetra::AugmentedOp::Apply()"
					   "Must call init() before Apply()!");

  // Import parameter components
  importedInput->Import(Input, *extendedImporter, Insert);

  // get views
  double **input_view;
  double **result_view;
  importedInput->ExtractView(&input_view);
  Result.ExtractView(&result_view);

  // get views of paramter components
  // this is done by setting the pointer for each column to start at
  // underlyingLength
  double **input_view_y = new double*[n];
  for (int i=0; i<n; i++)
    input_view_y[i] = input_view[i]+underlyingLength;

  // break input, result into components
  Epetra_MultiVector input_x(View, underlyingMap, input_view, n);
  Epetra_MultiVector input_y(View, localMap, input_view_y, n);
  Epetra_MultiVector result_x(View, underlyingMap, result_view, n);

  // Compute J*input_x
  jacOperator->Apply(input_x, result_x);

  if (useTranspose) {
    
    // Compute J^T*input_x + b*input_y
    result_x.Multiply('N', 'N', 1.0, *b, input_y, 1.0);

    // Compute a^T*input_x + c^T*input_y
    result_y->Multiply('T', 'N', 1.0, *a, input_x, 0.0);
    result_y->Multiply('T', 'N', 1.0, c, input_y, 1.0);

  }
  else {
    
    // Compute J*input_x + a*input_y
    result_x.Multiply('N', 'N', 1.0, *a, input_y, 1.0);

    // Compute b^T*input_x + c*input_y
    result_y->Multiply('T', 'N', 1.0, *b, input_x, 0.0);
    result_y->Multiply('N', 'N', 1.0, c, input_y, 1.0);

  }

  // Set parameter component
  if (haveParamComponent)
    for (int j=0; j<n; j++)
      for (int i=0; i<numConstraints; i++)
	result_view[j][underlyingLength+i] = (*result_y)[j][i];

   delete [] input_view_y;

  return 0;
}
Esempio n. 14
0
int main(int argc, char *argv[])
{

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

#define ML_SCALING
#ifdef ML_SCALING
   const int ntimers=4;
   enum {total, probBuild, precBuild, solve};
   ml_DblLoc timeVec[ntimers], maxTime[ntimers], minTime[ntimers];

  for (int i=0; i<ntimers; i++) timeVec[i].rank = Comm.MyPID();
  timeVec[total].value = MPI_Wtime();
#endif

  int nx;
  if (argc > 1) nx = (int) strtol(argv[1],NULL,10);
  else          nx = 256;

  if (nx < 1) nx = 256; // input a nonpositive integer if you want to specify
                        // the XML input file name.
  nx = nx*(int)sqrt((double)Comm.NumProc());
  int ny = nx;

  printf("nx = %d\nny = %d\n",nx,ny);
  fflush(stdout);

  char xmlFile[80];
  bool readXML=false;
  if (argc > 2) {strcpy(xmlFile,argv[2]); readXML = true;}
  else sprintf(xmlFile,"%s","params.xml");

  ParameterList GaleriList;
  GaleriList.set("nx", nx);
  GaleriList.set("ny", ny);

#ifdef ML_SCALING
  timeVec[probBuild].value = MPI_Wtime();
#endif
  Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList);
  Epetra_CrsMatrix* A = CreateCrsMatrix("Laplace2D", Map, GaleriList);

  if (!Comm.MyPID()) printf("nx = %d, ny = %d, mx = %d, my = %d\n",nx,ny,GaleriList.get("mx",-1),GaleriList.get("my",-1));
  fflush(stdout);
  //avoid potential overflow
  double numMyRows = A->NumMyRows();
  double numGlobalRows;
  Comm.SumAll(&numMyRows,&numGlobalRows,1);
  if (!Comm.MyPID()) printf("# global rows = %1.0f\n",numGlobalRows);
  //printf("pid %d: #rows = %d\n",Comm.MyPID(),A->NumMyRows());
  fflush(stdout);

  Epetra_MultiVector *coords = CreateCartesianCoordinates("2D", Map,GaleriList);
  double *x_coord=0,*y_coord=0,*z_coord=0;
  double **ttt;
  if (!coords->ExtractView(&ttt)) {
    x_coord = ttt[0];
    y_coord = ttt[1];
  } else {
    if (!Comm.MyPID()) printf("Error extracting coordinate vectors\n");
    MPI_Finalize();
    exit(EXIT_FAILURE);
  }

  Epetra_Vector LHS(*Map); LHS.Random();
  Epetra_Vector RHS(*Map); RHS.PutScalar(0.0);
  Epetra_LinearProblem Problem(A, &LHS, &RHS);
  AztecOO solver(Problem);

#ifdef ML_SCALING
  timeVec[probBuild].value = MPI_Wtime() - timeVec[probBuild].value;
#endif

  // =========================== begin of ML part ===========================

#ifdef ML_SCALING
  timeVec[precBuild].value = MPI_Wtime();
#endif
  ParameterList MLList;

  if (readXML) {
    MLList.set("read XML",true);
    MLList.set("XML input file",xmlFile);
  }
  else {
    cout << "here" << endl;
    ML_Epetra::SetDefaults("SA",MLList);
    MLList.set("smoother: type","Chebyshev");
    MLList.set("smoother: sweeps",3);
    MLList.set("coarse: max size",1);
  }

  MLList.set("x-coordinates", x_coord);
  MLList.set("y-coordinates", y_coord);
  MLList.set("z-coordinates", z_coord);

/*
RCP<std::vector<int> >
   m_smootherAztecOptions = rcp(new std::vector<int>(AZ_OPTIONS_SIZE));
RCP<std::vector<double> >
   m_smootherAztecParams = rcp(new std::vector<double>(AZ_PARAMS_SIZE));
//int             m_smootherAztecOptions[AZ_OPTIONS_SIZE];
//double          m_smootherAztecParams[AZ_PARAMS_SIZE];

std::string smootherType("Aztec");
AZ_defaults(&(*m_smootherAztecOptions)[0],&(*m_smootherAztecParams)[0]);
(*m_smootherAztecOptions)[AZ_precond]         = AZ_dom_decomp;
(*m_smootherAztecOptions)[AZ_subdomain_solve] = AZ_icc;
bool smootherAztecAsSolver = true;
*/
  MLList.set("ML output",10);

  MLList.set("repartition: enable",1);
  MLList.set("repartition: max min ratio",1.3);
  MLList.set("repartition: min per proc",200);
  MLList.set("repartition: partitioner","Zoltan");
  MLList.set("repartition: Zoltan dimensions",2);
  MLList.set("repartition: put on single proc",1);
  MLList.set("repartition: Zoltan type","hypergraph");
  MLList.set("repartition: estimated iterations",13);

/*
MLList.set("smoother: Aztec options",m_smootherAztecOptions);
MLList.set("smoother: Aztec params",m_smootherAztecParams);
MLList.set("smoother: type",smootherType.c_str());
MLList.set("smoother: Aztec as solver", smootherAztecAsSolver);

MLList.set("ML print initial list", 0);
MLList.set("ML print final list", 0);
*/

  ML_Epetra::MultiLevelPreconditioner* MLPrec =
    new ML_Epetra::MultiLevelPreconditioner(*A, MLList);

  // verify unused parameters on process 0 (put -1 to print on all
  // processes)
  MLPrec->PrintUnused(0);
#ifdef ML_SCALING
  timeVec[precBuild].value = MPI_Wtime() - timeVec[precBuild].value;
#endif

  // =========================== end of ML part =============================

#ifdef ML_SCALING
  timeVec[solve].value = MPI_Wtime();
#endif
  solver.SetPrecOperator(MLPrec);
  solver.SetAztecOption(AZ_solver, AZ_cg);
  solver.SetAztecOption(AZ_output, 1);
  solver.Iterate(500, 1e-12);
#ifdef ML_SCALING
  timeVec[solve].value = MPI_Wtime() - timeVec[solve].value;
#endif

  // destroy the preconditioner
  delete MLPrec;

  // compute the real residual

  double residual;
  LHS.Norm2(&residual);

  if( Comm.MyPID()==0 ) {
    cout << "||b-Ax||_2 = " << residual << endl;
  }

  // for testing purposes
  if (residual > 1e-5)
    exit(EXIT_FAILURE);

  delete A;
  delete Map;
  delete coords;

#ifdef ML_SCALING
  timeVec[total].value = MPI_Wtime() - timeVec[total].value;

  //avg
  double dupTime[ntimers],avgTime[ntimers];
  for (int i=0; i<ntimers; i++) dupTime[i] = timeVec[i].value;
  MPI_Reduce(dupTime,avgTime,ntimers,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
  for (int i=0; i<ntimers; i++) avgTime[i] = avgTime[i]/Comm.NumProc();
  //min
  MPI_Reduce(timeVec,minTime,ntimers,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD);
  //max
  MPI_Reduce(timeVec,maxTime,ntimers,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD);

  if (Comm.MyPID() == 0) {
    printf("timing :  max (pid)  min (pid)  avg\n");
    printf("Problem build         :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[probBuild].value,maxTime[probBuild].rank,
             minTime[probBuild].value,minTime[probBuild].rank,
             avgTime[probBuild]);
    printf("Preconditioner build  :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[precBuild].value,maxTime[precBuild].rank,
             minTime[precBuild].value,minTime[precBuild].rank,
             avgTime[precBuild]);
    printf("Solve                 :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[solve].value,maxTime[solve].rank,
             minTime[solve].value,minTime[solve].rank,
             avgTime[solve]);
    printf("Total                 :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[total].value,maxTime[total].rank,
             minTime[total].value,minTime[total].rank,
             avgTime[total]);
  }
#endif

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return(EXIT_SUCCESS);
}
//==============================================================================
// this requires Epetra_CrsMatrix + OptimizeStorage()
int Ifpack_PointRelaxation::
ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int* IndexOffset;
  int* Indices;
  double* Values;
  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));

  int NumVectors = X.NumVectors();

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);
  
  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {
    
    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    for (int i = 0 ; i < NumMyRows_ ; ++i) {

      int col;
      double diag = d_ptr[i];

      // no need to extract row i
      //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;

        for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
      }
    }

    for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {

      int col;
      double diag = d_ptr[i];

      // no need to extract row i
      //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;
        for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

      }
    }

    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m) 
        for (int i = 0 ; i < NumMyRows_ ; ++i)
          y_ptr[m][i] = y2_ptr[m][i];
  }

  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
  return(0);
}
Esempio n. 16
0
int main(int argc, char *argv[])
{

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int nx;
  if (argc > 1)
    nx = (int) strtol(argv[1],NULL,10);
  else
    nx = 256;
  int ny = nx * Comm.NumProc(); // each subdomain is a square

  ParameterList GaleriList;
  GaleriList.set("nx", nx);
  GaleriList.set("ny", ny);
  GaleriList.set("mx", 1);
  GaleriList.set("my", Comm.NumProc());

  int NumNodes = nx*ny;
  int NumPDEEqns = 2;

  Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList);
  Epetra_CrsMatrix* CrsA = CreateCrsMatrix("Laplace2D", Map, GaleriList);
  Epetra_VbrMatrix* A = CreateVbrMatrix(CrsA, NumPDEEqns);

  Epetra_Vector LHS(A->DomainMap()); LHS.PutScalar(0);
  Epetra_Vector RHS(A->DomainMap()); RHS.Random();
  Epetra_LinearProblem Problem(A, &LHS, &RHS);
  AztecOO solver(Problem);
  double *x_coord = 0, *y_coord = 0, *z_coord = 0;

  Epetra_MultiVector *coords = CreateCartesianCoordinates("2D", &(CrsA->Map()),
                                                          GaleriList);

  double **ttt;
  if (!coords->ExtractView(&ttt)) {
    x_coord = ttt[0];
    y_coord = ttt[1];
  } else {
    printf("Error extracting coordinate vectors\n");
#   ifdef HAVE_MPI
    MPI_Finalize() ;
#   endif
    exit(EXIT_FAILURE);
  }

  ParameterList MLList;
  SetDefaults("SA",MLList);
  MLList.set("ML output",10);
  MLList.set("max levels",10);
  MLList.set("increasing or decreasing","increasing");
  MLList.set("smoother: type", "Chebyshev");
  MLList.set("smoother: sweeps", 3);

  // *) if a low number, it will use all the available processes
  // *) if a big number, it will use only processor 0 on the next level
  MLList.set("aggregation: next-level aggregates per process", 1);

  MLList.set("aggregation: type (level 0)", "Zoltan");
  MLList.set("aggregation: type (level 1)", "Uncoupled");
  MLList.set("aggregation: type (level 2)", "Zoltan");
  MLList.set("aggregation: smoothing sweeps", 2);

  MLList.set("x-coordinates", x_coord);
  MLList.set("y-coordinates", y_coord);
  MLList.set("z-coordinates", z_coord);

  // specify the reduction with respect to the previous level
  // (very small values can break the code)
  int ratio = 16;
  MLList.set("aggregation: global aggregates (level 0)",
             NumNodes / ratio);
  MLList.set("aggregation: global aggregates (level 1)",
             NumNodes / (ratio * ratio));
  MLList.set("aggregation: global aggregates (level 2)",
             NumNodes / (ratio * ratio * ratio));

  MultiLevelPreconditioner* MLPrec =
    new MultiLevelPreconditioner(*A, MLList, true);

  solver.SetPrecOperator(MLPrec);
  solver.SetAztecOption(AZ_solver, AZ_cg_condnum);
  solver.SetAztecOption(AZ_output, 1);
  solver.Iterate(100, 1e-12);

  // compute the real residual
  Epetra_Vector Residual(A->DomainMap());
  //1.0 * RHS + 0.0 * RHS - 1.0 * (A * LHS)
  A->Apply(LHS,Residual);
  Residual.Update(1.0, RHS, 0.0, RHS, -1.0);
  double rn;
  Residual.Norm2(&rn);

  if (Comm.MyPID() == 0 )
    std::cout << "||b-Ax||_2 = " << rn << endl;

  if (Comm.MyPID() == 0 && rn > 1e-5) {
    std::cout << "TEST FAILED!!!!" << endl;
#   ifdef HAVE_MPI
    MPI_Finalize() ;
#   endif
    exit(EXIT_FAILURE);
  }

  delete MLPrec;
  delete coords;
  delete Map;
  delete CrsA;
  delete A;

  if (Comm.MyPID() == 0)
    std::cout << "TEST PASSED" << endl;

#ifdef HAVE_MPI
  MPI_Finalize() ;
#endif

  exit(EXIT_SUCCESS);

}
Esempio n. 17
0
  int  BuildMatrixTests (Epetra_MultiVector & C,
		       const char TransA, const char TransB, 
		       const double alpha, 
		       Epetra_MultiVector& A, 
		       Epetra_MultiVector& B,
		       const double beta,
		       Epetra_MultiVector& C_GEMM ) {

    // For given values of TransA, TransB, alpha and beta, a (possibly
    // zero) filled Epetra_MultiVector C, and allocated
    // Epetra_MultiVectors A, B and C_GEMM this routine will generate values for 
    // Epetra_MultiVectors A, B and C_GEMM such that, if A, B and (this) are 
    // used with GEMM in this class, the results should match the results 
    // generated by this routine.

    // Test for Strided multivectors (required for GEMM ops)

    if (!A.ConstantStride()   ||
	!B.ConstantStride()      ||
	!C_GEMM.ConstantStride() ||
	!C.ConstantStride()) return(-1); // Error 

    int i, j;
    double fi, fj;  // Used for casting loop variables to floats

    // Get a view of the MultiVectors

    double *Ap      = 0;
    double *Bp      = 0;
    double *Cp      = 0;
    double *C_GEMMp = 0;

    int A_nrows = A.MyLength();
    int A_ncols = A.NumVectors();
    int B_nrows = B.MyLength();
    int B_ncols = B.NumVectors();
    int C_nrows = C.MyLength();
    int C_ncols = C.NumVectors();
    int A_Stride         = 0;
    int B_Stride         = 0;
    int C_Stride         = 0;
    int C_GEMM_Stride    = 0;

    A.ExtractView(&Ap, &A_Stride);
    B.ExtractView(&Bp, &B_Stride);
    C.ExtractView(&Cp, &C_Stride);
    C_GEMM.ExtractView(&C_GEMMp, &C_GEMM_Stride);

      // Define some useful constants


    int opA_ncols = (TransA=='N') ? A.NumVectors() : A.MyLength();
    int opB_nrows = (TransB=='N') ? B.MyLength() : B.NumVectors();
  
    int C_global_inner_dim  = (TransA=='N') ? A.NumVectors() : A.GlobalLength();
  

    bool A_is_local = (!A.DistributedGlobal());
    bool B_is_local = (!B.DistributedGlobal());
    bool C_is_local = (!C.DistributedGlobal());

    int A_IndexBase = A.Map().IndexBase();
    int B_IndexBase = B.Map().IndexBase();
  
    // Build two new maps that we can use for defining global equation indices below
    Epetra_Map * A_Map = new Epetra_Map(-1, A_nrows, A_IndexBase, A.Map().Comm());
    Epetra_Map * B_Map = new Epetra_Map(-1, B_nrows, B_IndexBase, B.Map().Comm());

    int* A_MyGlobalElements = new int[A_nrows];
    A_Map->MyGlobalElements(A_MyGlobalElements);
    int* B_MyGlobalElements = new int[B_nrows];
    B_Map->MyGlobalElements(B_MyGlobalElements);

  // Check for compatible dimensions

    if (C.MyLength()        != C_nrows     ||
	opA_ncols      != opB_nrows   ||
	C.NumVectors()    != C_ncols     ||
	C.MyLength()        != C_GEMM.MyLength()        ||
	C.NumVectors()    != C_GEMM.NumVectors()      ) {
      delete A_Map;
      delete B_Map;
      delete [] A_MyGlobalElements;
      delete [] B_MyGlobalElements;
      return(-2); // Return error
    }

    bool Case1 = ( A_is_local &&  B_is_local &&  C_is_local);  // Case 1 above
    bool Case2 = (!A_is_local && !B_is_local &&  C_is_local && TransA=='T' );// Case 2
    bool Case3 = (!A_is_local &&  B_is_local && !C_is_local && TransA=='N');// Case 3
  
    // Test for meaningful cases

    if (!(Case1 || Case2 || Case3)) {
      delete A_Map;
      delete B_Map;
      delete [] A_MyGlobalElements;
      delete [] B_MyGlobalElements;
      return(-3); // Meaningless case
    }

    /* Fill A, B and C with values as follows:

       If A_is_local is false:
       A(i,j) = A_MyGlobalElements[i]*j, i=1,...,numLocalEquations, j=1,...,NumVectors
       else
       A(i,j) = i*j,     i=1,...,numLocalEquations, j=1,...,NumVectors
       
       If B_is_local is false:
       B(i,j) = 1/(A_MyGlobalElements[i]*j), i=1,...,numLocalEquations, j=1,...,NumVectors
       
       else
       B(i,j) = 1/(i*j), i=1,...,numLocalEquations, j=1,...,NumVectors
       
       In addition, scale each entry by GlobalLength for A and
       1/GlobalLength for B--keeps the magnitude of entries in check
  
     
       C_GEMM will depend on A_is_local and B_is_local.  Three cases:
       
       1) A_is_local true and B_is_local true:
       C_GEMM will be local replicated and equal to A*B = i*NumVectors/j
       
       2) A_is_local false and B_is_local false
       C_GEMM will be local replicated = A(trans)*B(i,j) = i*numGlobalEquations/j
       
       3) A_is_local false B_is_local true
       C_GEMM will distributed global and equals A*B = A_MyGlobalElements[i]*NumVectors/j
       
    */

    // Define a scalar to keep magnitude of entries reasonable

    double sf = C_global_inner_dim;
    double sfinv = 1.0/sf;

    // Define A depending on A_is_local

    if (A_is_local)
      {
	for (j = 0; j <A_ncols ; j++) 
	  for (i = 0; i<A_nrows; i++)
	    { 
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[i + A_Stride*j] = (fi*sfinv)*fj;
	    }
      }
    else
      {
	for (j = 0; j <A_ncols ; j++) 
	  for (i = 0; i<A_nrows; i++)
	    { 
	      fi = A_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Ap[i + A_Stride*j] = (fi*sfinv)*fj;
	    }
      }
    
    // Define B depending on TransB and B_is_local
  
    if (B_is_local)
      {
	for (j = 0; j <B_ncols ; j++) 
	  for (i = 0; i<B_nrows; i++)
	    { 
	      fi = i+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj);
	    }
      }
    else
      {
	for (j = 0; j <B_ncols ; j++) 
	  for (i = 0; i<B_nrows; i++)
	    { 
	      fi = B_MyGlobalElements[i]+1; // Get float version of i and j, offset by 1.
	      fj = j+1;
	      Bp[i + B_Stride*j] = 1.0/((fi*sfinv)*fj);
	    }
      }
    // Define C_GEMM depending on A_is_local and B_is_local.  C_GEMM is also a
    // function of alpha, beta, TransA, TransB: 
    
    //       C_GEMM = alpha*A(TransA)*B(TransB) + beta*C_GEMM
    
    if (Case1)
      {
	for (j = 0; j <C_ncols ; j++) 
	  for (i = 0; i<C_nrows; i++)
	    { 
	      // Get float version of i and j, offset by 1.
	      fi = (i+1)*C_global_inner_dim;
	      fj = j+1;
	      C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
		+ beta * Cp[i + C_Stride*j];
	    }
      }
    else if (Case2)
      {
	for (j = 0; j <C_ncols ; j++)
	  for (i = 0; i<C_nrows; i++)
	    { 
	      // Get float version of i and j, offset by 1.
	      fi = (i+1)*C_global_inner_dim;
	      fj = j+1;
	      C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
		+ beta * Cp[i + C_Stride*j];
	    }  
      }
    else
      {
	for (j = 0; j <C_ncols ; j++) 
	  for (i = 0; i<C_nrows; i++)
	    { 
	      // Get float version of i and j.
	      fi = (A_MyGlobalElements[i]+1)*C_global_inner_dim;
	      fj = j+1;
	      C_GEMMp[i + C_GEMM_Stride*j] = alpha * (fi/fj)
		+ beta * Cp[i + C_Stride*j];
	    }  
      }
    delete A_Map;
    delete B_Map;
    delete [] A_MyGlobalElements;
    delete [] B_MyGlobalElements;

    return(0);
  }
int Ifpack_PointRelaxation::
ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int* IndexOffset;
  int* Indices;
  double* Values;
  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));

  int NumVectors = X.NumVectors();

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {

    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    if(!DoBackwardGS_){
      /* Forward Mode */
      for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
        int i=LocalSmoothingIndices_[ii];

        int col;
        double diag = d_ptr[i];

        for (int m = 0 ; m < NumVectors ; ++m) {

          double dtemp = 0.0;

          for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y2_ptr[m][col];
          }

          y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
        }
      }
    }
    else {
      /* Backward Mode */
      for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
        int i=LocalSmoothingIndices_[ii];

        int col;
        double diag = d_ptr[i];

        for (int m = 0 ; m < NumVectors ; ++m) {

          double dtemp = 0.0;
          for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y2_ptr[m][col];
          }

          y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

        }
      }
    }


    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m)
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i=LocalSmoothingIndices_[ii];
          y_ptr[m][i] = y2_ptr[m][i];
        }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
#endif
  return(0);
} //ApplyInverseGS_LocalFastCrsMatrix()
int 
LOCA::Epetra::AugmentedOp::ApplyInverse(const Epetra_MultiVector& Input, 
					Epetra_MultiVector& Result) const
{

  // Get number of vectors
  int n = Input.NumVectors();

  // Check num vectors is correct
  if (importedInput == NULL || n != importedInput->NumVectors())
    globalData->locaErrorCheck->throwError(
				 "LOCA::Epetra::AugmentedOp::ApplyInverse()"
				 "Must call init() before ApplyInverse()!");

  // Import parameter components
  importedInput->Import(Input, *extendedImporter, Insert);

  // get views
  double **input_view;
  double **result_view;
  importedInput->ExtractView(&input_view);
  Result.ExtractView(&result_view);

  // get views of paramter components
  // this is done by setting the pointer for each column to start at
  // underlyingLength
  double **input_view_y = new double*[n];
  for (int i=0; i<n; i++)
    input_view_y[i] = input_view[i]+underlyingLength;

  // break input, result into components
  Epetra_MultiVector input_x(View, underlyingMap, input_view, n);
  Epetra_MultiVector input_y(View, localMap, input_view_y, n);
  Epetra_MultiVector result_x(View, underlyingMap, result_view, n);

  // copy input_y into result_y
  result_y->Scale(1.0, input_y);

  // Temporary epetra vectors
  Epetra_MultiVector tmp2(c);
  tmp->PutScalar(0.0);
 
  // Solve J*result_x = input_x 
  jacOperator->ApplyInverse(input_x, result_x);

  if (useTranspose) {

    // Solve J*tmp = b
    jacOperator->ApplyInverse(*b, *tmp);

    // Compute input_y - a^T*result_x
    result_y->Multiply('T', 'N', -1.0, *a, result_x, 1.0);

    // Compute c - a^T*tmp
    tmp2.Multiply('T', 'N', -1.0, *a, *tmp, 1.0);

  }
  else {

    // Solve J*tmp = a
    jacOperator->ApplyInverse(*a, *tmp);

    // Compute input_y - b^T*result_x
    result_y->Multiply('T', 'N', -1.0, *b, result_x, 1.0);

    // Compute c - b^T*tmp1
    tmp2.Multiply('T', 'N', -1.0, *b, *tmp, 1.0);

  }

  // Solve (c - z^T*tmp1)^-1 * (input_y - z^T*result_x)  where z = a or b
  int *ipiv = new int[numConstraints];
  int info;
  double *result_y_view; 
  int result_y_lda;
  result_y->ExtractView(&result_y_view, &result_y_lda);
  double *tmp2_view;
  int tmp2_lda;
  tmp2.ExtractView(&tmp2_view, &tmp2_lda);
  dlapack.GESV(numConstraints, n, tmp2_view, tmp2_lda, ipiv, result_y_view, 
	       result_y_lda, &info);
  delete [] ipiv;
  if (info != 0) {
    globalData->locaErrorCheck->throwError(
				 "LOCA::Epetra::AugmentedOp::ApplyInverse()"
				 "Solve of dense matrix failed!");
  }
  
  // Compute result_x = result_x - tmp*result_y
  result_x.Multiply('N', 'N', -1.0, *tmp, *result_y, 1.0);

  // Set parameter component
  if (haveParamComponent)
    for (int j=0; j<n; j++)
      for (int i=0; i<numConstraints; i++)
	result_view[j][underlyingLength+i] = (*result_y)[j][i];

  delete [] input_view_y;

  return 0;
}
//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int NumVectors = X.NumVectors();
  int Length = Matrix().MaxNumEntries();
  std::vector<int> Indices(Length);
  std::vector<double> Values(Length);

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {

    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
      int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

      int NumEntries;
      int col;
      double diag = d_ptr[i];

      IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                               &Values[0], &Indices[0]));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;

        for (int k = 0 ; k < NumEntries ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
      }
    }
    for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
      int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

      int NumEntries;
      int col;
      double diag = d_ptr[i];

      IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                               &Values[0], &Indices[0]));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;
        for (int k = 0 ; k < NumEntries ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

      }
    }

    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m)
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
          y_ptr[m][i] = y2_ptr[m][i];
        }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
#endif
  return(0);
}
Esempio n. 21
0
// ================================================ ====== ==== ==== == =
// Apply the preconditioner w/ RHS B and get result X
int ML_Epetra::LevelWrap::ApplyInverse(const Epetra_MultiVector& B, Epetra_MultiVector& X_) const{
#ifdef ML_TIMING
  double t_time,t_diff;
  StartTimer(&t_time);
#endif

  // Sanity Checks
  if (!B.Map().SameAs(OperatorDomainMap())) return -1;
  if (!X_.Map().SameAs(OperatorRangeMap())) return -1;
  if (!X_.Map().SameAs(B.Map())) return -1;
  if (B.NumVectors() != X_.NumVectors()) return -1;

  // Build new work vector X
  Epetra_MultiVector X(X_.Map(),X_.NumVectors(),true);
  Epetra_MultiVector tmp0(X_.Map(),X_.NumVectors(),true);
  Epetra_MultiVector tmp1(P0_->DomainMap(),X_.NumVectors(),true);
  Epetra_MultiVector tmp2(P0_->DomainMap(),X_.NumVectors(),true);

  double *Bptr, *Xptr;
  int    BLDA, XLDA;

  // Pre Smoother
  if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){
    if (use_mlsmoother_) {   // ML smoothing is experimental 
      B.ExtractView(&Bptr,&BLDA);
      X.ExtractView(&Xptr,&XLDA);
      ML_Smoother_Apply(&(ml_subproblem_->pre_smoother[0]),
                        X.MyLength(), Xptr,
                        X.MyLength(), Bptr, ML_ZERO);
    }
    else Smoother_->ApplyInverse(B,X);

    A0_->Apply(X,tmp0);
    tmp0.Update(1.0,B,-1.0);
  }
  else  tmp0 = B;

  // Form coarse residual
  if(use_pt_) P0_->Multiply(true,tmp0,tmp1);
  else R0_->Multiply(false,tmp0,tmp1);

  // Solve coarse problem
  A1prec_->ApplyInverse(tmp1,tmp2);

  // Update solution
  P0_->Multiply(false,tmp2,tmp0);
  X.Update(1.0,tmp0,1.0);

  // Post Smoother
  if(pre_or_post==ML_BOTH || pre_or_post==ML_POSTSMOOTHER){
    if (use_mlsmoother_) {   // ML smoothing is experimental 
      Epetra_MultiVector tmp3(X_.Map(),X_.NumVectors(),true);

      A0_->Apply(X,tmp0);
      tmp0.Update(1.0,B,-1.0);
      tmp3.PutScalar(0.0);
      tmp0.ExtractView(&Bptr,&BLDA);
      tmp3.ExtractView(&Xptr,&XLDA);

      ML_Smoother_Apply(&(ml_subproblem_->pre_smoother[0]),X.MyLength(),Xptr,
                        X.MyLength(), Bptr, ML_ZERO);
      X.Update(1.0,tmp3,1.0);
    }
    else Smoother_->ApplyInverse(B,X);
  }

  // Copy to output
  X_=X;

#ifdef ML_TIMING
  StopTimer(&t_time,&t_diff);
  /* Output */
  ML_Comm *comm_;
  ML_Comm_Create(&comm_);
  ApplicationTime_+= t_diff;
  if(FirstApplication_){
    FirstApplication_=false;
    FirstApplicationTime_=ApplicationTime_;
  }/*end if*/
  ML_Comm_Destroy(&comm_);
#endif

  return 0;
}