void ISVDMultiCD::makePass() {
    Epetra_LAPACK lapack;
    Epetra_BLAS   blas;

    bool firstPass = (curRank_ == 0);
    const int numCols = A_->NumVectors();
    TEUCHOS_TEST_FOR_EXCEPTION( !firstPass && (numProc_ != numCols), std::logic_error,
        "RBGen::ISVDMultiCD::makePass(): after first pass, numProc should be numCols");

    // compute W = I - Z T Z^T from current V_
    Teuchos::RCP<Epetra_MultiVector> lclAZT, lclZ;
    double *Z_A, *AZT_A;
    int Z_LDA, AZT_LDA;
    int oldRank = 0;
    double Rerr = 0.0;
    if (!firstPass) {
      // copy V_ into workZ_
      lclAZT = Teuchos::rcp( new Epetra_MultiVector(::View,*workAZT_,0,curRank_) );
      lclZ   = Teuchos::rcp( new Epetra_MultiVector(::View,*workZ_,0,curRank_) );
      {
        Epetra_MultiVector lclV(::View,*V_,0,curRank_);
        *lclZ = lclV;
      }
      // compute the Householder QR factorization of the current right basis
      // Vhat = W*R
      int info, lwork = curRank_;
      std::vector<double> tau(curRank_), work(lwork);
      info = lclZ->ExtractView(&Z_A,&Z_LDA);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector Z.");
      lapack.GEQRF(numCols,curRank_,Z_A,Z_LDA,&tau[0],&work[0],lwork,&info);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling GEQRF on current right basis while constructing next pass coefficients.");
      if (debug_) {
        // we just took the QR factorization of a set of orthonormal vectors
        // they should have an R factor which is diagonal, with unit elements (\pm 1)
        // check it
        Rerr = 0.0;
        for (int j=0; j<curRank_; j++) {
          for (int i=0; i<j; i++) {
            Rerr += abs(Z_A[j*Z_LDA+i]);
          }
          Rerr += abs(abs(Z_A[j*Z_LDA+j]) - 1.0);
        }
      }
      // compute the block representation
      // W = I - Z T Z^T
      lapack.LARFT('F','C',numCols,curRank_,Z_A,Z_LDA,&tau[0],workT_->A(),workT_->LDA());
      // LARFT left upper tri block of Z unchanged
      // note: it should currently contain R factor of V_, which is very close to
      //   diag(\pm 1, ..., \pm 1)
      //
      // we need to set it to:
      //   [1 0 0 ... 0]
      //   [  1 0 ... 0]
      //   [   ....    ]
      //   [          1]
      //
      // see documentation for LARFT
      //
      for (int j=0; j<curRank_; j++) {
        Z_A[j*Z_LDA+j] = 1.0;
        for (int i=0; i<j; i++) {
          Z_A[j*Z_LDA+i] = 0.0;
        }
      }
      // compute part of A W:  A Z T
      // put this in workAZT_
      // first, A Z
      info = lclAZT->Multiply('N','N',1.0,*A_,*lclZ,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Z");
      // second, (A Z) T (in situ, as T is upper triangular)
      info = lclAZT->ExtractView(&AZT_A,&AZT_LDA);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector AZ.");
      blas.TRMM('R','U','N','N',numCols,curRank_,1.0,workT_->A(),workT_->LDA(),AZT_A,AZT_LDA);
      // save oldRank: it tells us the width of Z
      oldRank  = curRank_;

      curRank_ = 0;
      numProc_ = 0;
    }
    else { // firstPass == true
      curRank_ = 0;
      numProc_ = 0;
    }

    while (numProc_ < numCols) {
      //
      // determine lup
      //
      // want lup >= lmin
      //      lup <= lmax
      // need lup <= numCols - numProc
      //      lup <= maxBasisSize - curRank
      //
      int lup;
      if (curRank_ == 0) {
        // first step uses startRank_
        // this is not affected by lmin,lmax
        lup = startRank_;
      }
      else {
        // this value minimizes overall complexity, assuming fixed rank
        lup = (int)(curRank_ / Teuchos::ScalarTraits<double>::squareroot(2.0));
        // contrain to [lmin,lmax]
        lup = (lup < lmin_ ? lmin_ : lup);
        lup = (lup > lmax_ ? lmax_ : lup);
      }
      //
      // now cap lup via maxBasisSize and the available data
      // these caps apply to all lup, as a result of memory and data constraints
      //
      // available data
      lup = (lup > numCols - numProc_ ? numCols - numProc_ : lup);
      // available memory
      lup = (lup > maxBasisSize_ - curRank_ ? maxBasisSize_ - curRank_ : lup);

      // get view of new vectors
      {
        const Epetra_MultiVector Aplus(::View,*A_,numProc_,lup);
        Epetra_MultiVector        Unew(::View,*U_,curRank_,lup);
        // put them in U
        if (firstPass) {
          // new vectors are just Aplus
          Unew = Aplus;
        }
        else {
          // new vectors are Aplus - (A Z T) Z_i^T
          // specifically, Aplus - (A Z T) Z(numProc:numProc+lup-1,1:oldRank)^T
          Epetra_LocalMap lclmap(lup,0,A_->Comm());
          Epetra_MultiVector Zi(::View,lclmap,&Z_A[numProc_],Z_LDA,oldRank);
          Unew = Aplus;
          int info = Unew.Multiply('N','T',-1.0,*lclAZT,Zi,1.0);
          TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
              "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Wi");
        }
      }

      // perform the incremental step
      incStep(lup);
    }

    // compute W V = V - Z T Z^T V
    // Z^T V is oldRank x curRank
    // T Z^T V is oldRank x curRank
    // we need T Z^T V in a local Epetra_MultiVector
    if (!firstPass) {
      Teuchos::RCP<Epetra_MultiVector> lclV;
      double *TZTV_A;
      int TZTV_LDA;
      int info;
      Epetra_LocalMap lclmap(oldRank,0,A_->Comm());
      // get pointer to current V
      lclV = Teuchos::rcp( new Epetra_MultiVector(::View,*V_,0,curRank_) );
      // create space for T Z^T V
      Epetra_MultiVector TZTV(lclmap,curRank_,false);
      // multiply Z^T V
      info = TZTV.Multiply('T','N',1.0,*lclZ,*lclV,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for Z^T V.");
      // get pointer to data in Z^T V
      info = TZTV.ExtractView(&TZTV_A,&TZTV_LDA);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector TZTV.");
      // multiply T (Z^T V)
      blas.TRMM('L','U','N','N',oldRank,curRank_,1.0,workT_->A(),workT_->LDA(),TZTV_A,TZTV_LDA);
      // multiply V - Z (T Z^T V)
      info = lclV->Multiply('N','N',-1.0,*lclZ,TZTV,1.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for W V.");
    }

    //
    // compute the new residuals
    // we know that A V = U S
    // if, in addition, A^T U = V S, then have singular subspaces
    // check residuals A^T U - V S, scaling the i-th column by sigma[i]
    //
    {
      // make these static, because makePass() will be likely be called again
      static Epetra_LocalMap lclmap(A_->NumVectors(),0,A_->Comm());
      static Epetra_MultiVector ATU(lclmap,maxBasisSize_,false);

      // we know that A V = U S
      // if, in addition, A^T U = V S, then have singular subspaces
      // check residuals A^T U - V S, scaling the i-th column by sigma[i]
      Epetra_MultiVector ATUlcl(::View,ATU,0,curRank_);
      Epetra_MultiVector Ulcl(::View,*U_,0,curRank_);
      Epetra_MultiVector Vlcl(::View,*V_,0,curRank_);
      // compute A^T U
      int info = ATUlcl.Multiply('T','N',1.0,*A_,Ulcl,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply for A^T U.");
      Epetra_LocalMap rankmap(curRank_,0,A_->Comm());
      Epetra_MultiVector S(rankmap,curRank_,true);
      for (int i=0; i<curRank_; i++) {
        S[i][i] = sigma_[i];
      }
      // subtract V S from A^T U
      info = ATUlcl.Multiply('N','N',-1.0,Vlcl,S,1.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::computeBasis(): Error calling Epetra_MultiVector::Multiply for V S.");
      resNorms_.resize(curRank_);
      ATUlcl.Norm2(&resNorms_[0]);
      // scale by sigmas
      for (int i=0; i<curRank_; i++) {
        if (sigma_[i] != 0.0) {
          resNorms_[i] /= sigma_[i];
        }
      }
    }

    // debugging checks
    std::vector<double> errnorms(curRank_);
    if (debug_) {
      int info;
      // Check that A V = U Sigma
      // get pointers to current U and V, create workspace for A V - U Sigma
      Epetra_MultiVector work(U_->Map(),curRank_,false), 
                         curU(::View,*U_,0,curRank_),
                         curV(::View,*V_,0,curRank_);
      // create local MV for sigmas
      Epetra_LocalMap lclmap(curRank_,0,A_->Comm());
      Epetra_MultiVector curS(lclmap,curRank_,true);
      for (int i=0; i<curRank_; i++) {
        curS[i][i] = sigma_[i];
      }
      info = work.Multiply('N','N',1.0,curU,curS,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S.");
      info = work.Multiply('N','N',-1.0,*A_,curV,1.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S - A V.");
      work.Norm2(&errnorms[0]);
      for (int i=0; i<curRank_; i++) {
        if (sigma_[i] != 0.0) {
          errnorms[i] /= sigma_[i];
        }
      }
    }

    // update pass counter
    curNumPasses_++;

    // print out some info
    const Epetra_Comm *comm = &A_->Comm();
    if (comm->MyPID() == 0 && verbLevel_ >= 1) {
      std::cout 
        << "------------- ISVDMultiCD::makePass() -----------" << std::endl
        << "| Number of passes: " << curNumPasses_ << std::endl
        << "|     Current rank: " << curRank_ << std::endl
        << "|   Current sigmas: " << std::endl;
      for (int i=0; i<curRank_; i++) {
        std::cout << "|             " << sigma_[i] << std::endl;
      }
      if (debug_) {
        std::cout << "|DBG   US-AV norms: " << std::endl;
        for (int i=0; i<curRank_; i++) {
          std::cout << "|DBG          " << errnorms[i] << std::endl;
        }
        if (!firstPass) {
          std::cout << "|DBG      R-I norm: " << Rerr << std::endl;
        }
      }
    }

    return;
  }
示例#2
0
// ======================================================================
void GetPtent(const Epetra_RowMatrix& A, Teuchos::ParameterList& List,
              double* thisns, Teuchos::RCP<Epetra_CrsMatrix>& Ptent,
              Teuchos::RCP<Epetra_MultiVector>& NextNS, const int domainoffset)
{
  const int nsdim = List.get<int>("null space: dimension",-1);
  if (nsdim<=0) ML_THROW("null space dimension not given",-1);
  const Epetra_Map& rowmap = A.RowMatrixRowMap();
  const int mylength = rowmap.NumMyElements();

  // wrap nullspace into something more handy
  Epetra_MultiVector ns(View,rowmap,thisns,mylength,nsdim);

  // vector to hold aggregation information
  Epetra_IntVector aggs(rowmap,false);

  // aggregation with global aggregate numbering
  int naggregates = GetGlobalAggregates(const_cast<Epetra_RowMatrix&>(A),List,thisns,aggs);

  // build a domain map for Ptent
  // find first aggregate on proc
  int firstagg = -1;
  int offset = -1;
  for (int i=0; i<mylength; ++i)
    if (aggs[i]>=0)
    {
      offset = firstagg = aggs[i];
      break;
    }
  offset *= nsdim;
  if (offset<0) ML_THROW("could not find any aggregate on proc",-2);
  std::vector<int> coarsegids(naggregates*nsdim);
  for (int i=0; i<naggregates; ++i)
    for (int j=0; j<nsdim; ++j)
    {
      coarsegids[i*nsdim+j] = offset + domainoffset;
      ++offset;
    }
  Epetra_Map pdomainmap(-1,naggregates*nsdim,&coarsegids[0],0,A.Comm());

  // loop aggregates and build ids for dofs
  std::map<int,std::vector<int> > aggdofs;
  std::map<int,std::vector<int> >::iterator fool;
  for (int i=0; i<naggregates; ++i)
  {
    std::vector<int> gids(0);
    aggdofs.insert(std::pair<int,std::vector<int> >(firstagg+i,gids));
  }
  for (int i=0; i<mylength; ++i)
  {
    if (aggs[i]<0) continue;
    std::vector<int>& gids = aggdofs[aggs[i]];
    gids.push_back(aggs.Map().GID(i));
  }

#if 0 // debugging output
  for (int proc=0; proc<A.Comm().NumProc(); ++proc)
  {
    if (proc==A.Comm().MyPID())
    {
      for (fool=aggdofs.begin(); fool!=aggdofs.end(); ++fool)
      {
        std::cout << "Proc " << proc << " Aggregate " << fool->first << " Dofs ";
        std::vector<int>& gids = fool->second;
        for (int i=0; i<(int)gids.size(); ++i) std::cout << gids[i] << " ";
        std::cout << std::endl;
      }
    }
    fflush(stdout);
    A.Comm().Barrier();
  }
#endif

  // coarse level nullspace to be filled
  NextNS = Teuchos::rcp(new Epetra_MultiVector(pdomainmap,nsdim,true));
  Epetra_MultiVector& nextns = *NextNS;

  // matrix Ptent
  Ptent = Teuchos::rcp(new Epetra_CrsMatrix(Copy,rowmap,nsdim));

  // loop aggregates and extract the appropriate slices of the nullspace.
  // do QR and assemble Q and R to Ptent and NextNS
  for (fool=aggdofs.begin(); fool!=aggdofs.end(); ++fool)
  {
    // extract aggregate-local junk of nullspace
    const int aggsize = (int)fool->second.size();
    Epetra_SerialDenseMatrix Bagg(aggsize,nsdim);
    for (int i=0; i<aggsize; ++i)
      for (int j=0; j<nsdim; ++j)
        Bagg(i,j) = (*ns(j))[ns.Map().LID(fool->second[i])];

    // Bagg = Q*R
    int m = Bagg.M();
    int n = Bagg.N();
    int lwork = n*10;
    int info = 0;
    int k = std::min(m,n);
    if (k!=n) ML_THROW("Aggregate too small, fatal!",-1);
    std::vector<double> work(lwork);
    std::vector<double> tau(k);
    Epetra_LAPACK lapack;
    lapack.GEQRF(m,n,Bagg.A(),m,&tau[0],&work[0],lwork,&info);
    if (info) ML_THROW("Lapack dgeqrf returned nonzero",info);
    if (work[0]>lwork)
    {
      lwork = (int)work[0];
      work.resize(lwork);
    }

    // get R (stored on Bagg) and assemble it into nextns
    int agg_cgid = fool->first*nsdim;
    if (!nextns.Map().MyGID(agg_cgid+domainoffset))
      ML_THROW("Missing coarse column id on this proc",-1);
    for (int i=0; i<n; ++i)
      for (int j=i; j<n; ++j)
        (*nextns(j))[nextns.Map().LID(domainoffset+agg_cgid+i)] = Bagg(i,j);

    // get Q and assemble it into Ptent
    lapack.ORGQR(m,n,k,Bagg.A(),m,&tau[0],&work[0],lwork,&info);
    if (info) ML_THROW("Lapack dorgqr returned nonzero",info);
    for (int i=0; i<aggsize; ++i)
    {
      const int actgrow = fool->second[i];
      for (int j=0; j<nsdim; ++j)
      {
        int actgcol = fool->first*nsdim+j+domainoffset;
        int errone = Ptent->SumIntoGlobalValues(actgrow,1,&Bagg(i,j),&actgcol);
        if (errone>0)
        {
          int errtwo = Ptent->InsertGlobalValues(actgrow,1,&Bagg(i,j),&actgcol);
          if (errtwo<0) ML_THROW("Epetra_CrsMatrix::InsertGlobalValues returned negative nonzero",errtwo);
        }
        else if (errone) ML_THROW("Epetra_CrsMatrix::SumIntoGlobalValues returned negative nonzero",errone);
      }
    } // for (int i=0; i<aggsize; ++i)
  } // for (fool=aggdofs.begin(); fool!=aggdofs.end(); ++fool)
  int err = Ptent->FillComplete(pdomainmap,rowmap);
  if (err) ML_THROW("Epetra_CrsMatrix::FillComplete returned nonzero",err);
  err = Ptent->OptimizeStorage();
  if (err) ML_THROW("Epetra_CrsMatrix::OptimizeStorage returned nonzero",err);

  return;
}