Beispiel #1
0
  void
  gatherStack (MatrixViewType& R_stack,
               ConstMatrixViewType& R_local,
               const Teuchos::RCP<MessengerBase<typename MatrixViewType::scalar_type> >& messenger)
  {
    typedef typename MatrixViewType::ordinal_type ordinal_type;
    typedef typename MatrixViewType::scalar_type scalar_type;
    typedef MatView<ordinal_type, scalar_type> mat_view_type;

    const int nprocs = messenger->size();
    const int my_rank = messenger->rank();

    if (my_rank == 0) {
      const ordinal_type ncols = R_stack.ncols();

      // Copy data from R_local into top ncols x ncols block of R_stack.
      mat_view_type R_stack_view_first (ncols, ncols, R_stack.get(), R_stack.lda());
      deep_copy (R_stack_view_first, R_local);

      // Loop through all other processors, fetching their matrix data.
      RMessenger< ordinal_type, scalar_type > receiver (messenger);
      for (int srcProc = 1; srcProc < nprocs; ++srcProc) {
        const scalar_type* const R_ptr = R_stack.get() + srcProc*ncols;
        mat_view_type R_stack_view_cur (ncols, ncols, R_ptr, R_stack.lda());
        // Fill (the lower triangle) with zeros, since
        // RMessenger::recv() only writes to the upper triangle.
        R_stack_view_cur.fill (scalar_type (0));
        receiver.recv (R_stack_view_cur, srcProc);
      }
    }
    else {
      // We only read R_stack on Proc 0, not on this proc.
      // Send data from R_local to Proc 0.
      const int destProc = 0;
      RMessenger<ordinal_type, scalar_type> sender (messenger);
      sender.send (R_local, destProc);
    }
    messenger->barrier ();
  }
 Matrix (const MatrixViewType& in) :
     nrows_ (in.nrows()),
     ncols_ (in.ncols()),
     A_ (verified_alloc_size (in.nrows(), in.ncols()))
 {
     if (A_.size() != 0)
         copy_matrix (nrows(), ncols(), get(), lda(), in.get(), in.lda());
 }
Beispiel #3
0
    void
    unpack (MatrixViewType& R)
    {
      typedef typename MatrixViewType::ordinal_type view_ordinal_type;
      typedef typename std::vector< Scalar >::const_iterator const_iter_type;

      const view_ordinal_type ncols = R.ncols();
      const_iter_type iter = buffer_.begin();
      for (view_ordinal_type j = 0; j < ncols; ++j)
        {
          std::copy (iter, iter + (j+1), &R(0,j));
          iter += (j+1);
        }
    }
Beispiel #4
0
    void
    fill_with_zeros (MatrixViewType A,
		     const bool contiguous_cache_blocks) const
    {
      // Note: if the cache blocks are stored contiguously, A.lda()
      // won't be the correct leading dimension of A, but it won't
      // matter: we only ever operate on A_cur here, and A_cur's
      // leading dimension is set correctly by split_top_block().
      while (! A.empty())
	{
	  // This call modifies the matrix view A, but that's OK since
	  // we passed the input view by copy, not by reference.
	  MatrixViewType A_cur = split_top_block (A, contiguous_cache_blocks);
	  A_cur.fill (Scalar(0));
	}
    }
      void
      implicit_Q (MatrixViewType& Q, 
		  typename MatrixViewType::scalar_type tau[])
      {
	implicit_Q (Q.nrows(), Q.ncols(), Q.get(), Q.lda(), tau);
      }
Beispiel #6
0
 static void
 printMatrix (std::ostream& out,
              const MatrixViewType& A)
 {
   print_local_matrix (out, A.nrows(), A.ncols(), A.get(), A.lda());
 }
    void
    randomGlobalMatrix (Generator* const pGenerator,
                        MatrixViewType& A_local,
                        const typename Teuchos::ScalarTraits< typename MatrixViewType::scalar_type >::magnitudeType singular_values[],
                        MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger,
                        MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger)
    {
      using Teuchos::NO_TRANS;
      using std::vector;
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      typedef typename MatrixViewType::scalar_type scalar_type;


      const bool b_local_debug = false;

      const int rootProc = 0;
      const int nprocs = ordinalMessenger->size();
      const int myRank = ordinalMessenger->rank();
      Teuchos::BLAS<ordinal_type, scalar_type> blas;

      const ordinal_type nrowsLocal = A_local.nrows();
      const ordinal_type ncols = A_local.ncols();

      // Theory: Suppose there are P processors.  Proc q wants an m_q by n
      // component of the matrix A, which we write as A_q.  On Proc 0, we
      // generate random m_q by n orthogonal matrices Q_q (in explicit
      // form), and send Q_q to Proc q.  The m by n matrix [Q_0; Q_1; ...;
      // Q_{P-1}] is not itself orthogonal.  However, the m by n matrix
      // Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal:
      //
      // \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I.

      if (myRank == rootProc)
        {
          typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type;
          matgen_type matGen (*pGenerator);

          // Generate a random ncols by ncols upper triangular matrix
          // R with the given singular values.
          Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0));
          matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values);

          // Broadcast R to all the processors.
          scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);

          // Generate (for myself) a random nrowsLocal x ncols
          // orthogonal matrix, stored in explicit form.
          Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
          matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda());

          // Scale the (local) orthogonal matrix by the number of
          // processors P, to make the columns of the global matrix Q
          // orthogonal.  (Otherwise the norm of each column will be P
          // instead of 1.)
          const scalar_type P = static_cast< scalar_type > (nprocs);
          // Do overflow check.  If casting P back to scalar_type
          // doesn't produce the same value as nprocs, the cast
          // overflowed.  We take the real part, because scalar_type
          // might be complex.
          if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
            throw std::runtime_error ("Casting nprocs to Scalar failed");

          scaleMatrix (Q_local, P);

          // A_local := Q_local * R
          blas.GEMM (NO_TRANS, NO_TRANS, nrowsLocal, ncols, ncols,
                     scalar_type(1), Q_local.get(), Q_local.lda(),
                     R.get(), R.lda(),
                     scalar_type(0), A_local.get(), A_local.lda());

          for (int recvProc = 1; recvProc < nprocs; ++recvProc)
            {
              // Ask the receiving processor how big (i.e., how many rows)
              // its local component of the matrix is.
              ordinal_type nrowsRemote = 0;
              ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0);

              if (b_local_debug)
                {
                  std::ostringstream os;
                  os << "For Proc " << recvProc << ": local block is "
                     << nrowsRemote << " by " << ncols << std::endl;
                  std::cerr << os.str();
                }

              // Make sure Q_local is big enough to hold the data for
              // the current receiver proc.
              Q_local.reshape (nrowsRemote, ncols);

              // Compute a random nrowsRemote * ncols orthogonal
              // matrix Q_local, for the current receiving processor.
              matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda());

              // Send Q_local to the current receiving processor.
              scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0);
            }
        }
      else
        {
          // Receive the R factor from Proc 0.  There's only 1 R
          // factor for all the processes.
          Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type (0));
          scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);

          // Q_local (nrows_local by ncols, random orthogonal matrix)
          // will be received from Proc 0, where it was generated.
          const ordinal_type recvSize = nrowsLocal * ncols;
          Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);

          // Tell Proc 0 how many rows there are in the random orthogonal
          // matrix I want to receive from Proc 0.
          ordinalMessenger->send (&nrowsLocal, 1, rootProc, 0);

          // Receive the orthogonal matrix from Proc 0.
          scalarMessenger->recv (Q_local.get(), recvSize, rootProc, 0);

          // Scale the (local) orthogonal matrix by the number of
          // processors, to make the global matrix Q orthogonal.
          const scalar_type P = static_cast< scalar_type > (nprocs);
          // Do overflow check.  If casting P back to scalar_type
          // doesn't produce the same value as nprocs, the cast
          // overflowed.  We take the real part, because scalar_type
          // might be complex.
          if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
            throw std::runtime_error ("Casting nprocs to Scalar failed");
          scaleMatrix (Q_local, P);

          // A_local := Q_local * R
          blas.GEMM (NO_TRANS, NO_TRANS, nrowsLocal, ncols, ncols,
                     scalar_type(1), Q_local.get(), Q_local.lda(),
                     R.get(), R.lda(),
                     scalar_type(0), A_local.get(), A_local.lda());
        }
    }