Beispiel #1
0
    MatrixViewType 
    get_cache_block (MatrixViewType A,
		     const typename MatrixViewType::ordinal_type cache_block_index,
		     const bool contiguous_cache_blocks) const
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      typedef typename MatrixViewType::scalar_type scalar_type;

      // Total number of cache blocks.
      const ordinal_type num_cache_blocks = 
	strategy_.num_cache_blocks (A.nrows(), A.ncols(), nrows_cache_block());

      if (cache_block_index >= num_cache_blocks)
	return MatrixViewType (0, 0, NULL, 0); // empty

      // result[0] = starting row index of the cache block
      // result[1] = number of rows in the cache block
      // result[2] = pointer offset (A.get() + result[2])
      // result[3] = leading dimension (a.k.a. stride) of the cache block
      std::vector<Ordinal> result = 
	strategy_.cache_block_details (cache_block_index, A.nrows(), A.ncols(),
				       A.lda(), nrows_cache_block(), 
				       contiguous_cache_blocks);
      if (result[1] == 0)
	// For some reason, the cache block is empty.  	
	return MatrixViewType (0, 0, NULL, 0);

      // We expect that ordinal_type is signed, so adding signed
      // (ordinal_type) to unsigned (pointer) may raise compiler
      // warnings.
      return MatrixViewType (result[1], A.ncols(), 
			     A.get() + static_cast<size_t>(result[2]), 
			     result[3]);
    }
 Matrix (const MatrixViewType& in) :
     nrows_ (in.nrows()),
     ncols_ (in.ncols()),
     A_ (verified_alloc_size (in.nrows(), in.ncols()))
 {
     if (A_.size() != 0)
         copy_matrix (nrows(), ncols(), get(), lda(), in.get(), in.lda());
 }
    static void
    scaleMatrix (MatrixViewType& A,
                 const typename MatrixViewType::scalar_type& denom)
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      typedef typename MatrixViewType::scalar_type scalar_type;

      const ordinal_type nrows = A.nrows();
      const ordinal_type ncols = A.ncols();
      const ordinal_type lda = A.lda();

      if (nrows == lda) { // A is stored contiguously.
        const ordinal_type nelts = nrows * ncols;
        scalar_type* const A_ptr = A.get ();
        for (ordinal_type k = 0; k < nelts; ++k) {
          A_ptr[k] /= denom;
        }
      }
      else { // Each column of A is stored contiguously.
        for (ordinal_type j = 0; j < ncols; ++j) {
          scalar_type* const A_j = &A(0,j);
          for (ordinal_type i = 0; i < nrows; ++i) {
            A_j[i] /= denom;
          }
        }
      }
    }
 bool operator== (const MatrixViewType& B) const
 {
     if (get() != B.get() || nrows() != B.nrows() || ncols() != B.ncols() || lda() != B.lda()) {
         return false;
     } else {
         return true;
     }
 }
Beispiel #5
0
 static
 std::vector<typename Teuchos::ScalarTraits<typename MatrixViewType::scalar_type>::magnitudeType>
 localVerify (const MatrixViewType& A,
              const MatrixViewType& Q,
              const MatrixViewType& R)
 {
   return local_verify (A.nrows(), A.ncols(), A.get(), A.lda(),
                        Q.get(), Q.lda(), R.get(), R.lda());
 }
Beispiel #6
0
    MatrixViewType
    split_top_block (MatrixViewType& A, const bool contiguous_cache_blocks) const
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      const ordinal_type nrows_top = 
	strategy_.top_block_split_nrows (A.nrows(), ncols(), 
					 nrows_cache_block());
      // split_top() sets A to A_rest, and returns A_top.
      return A.split_top (nrows_top, contiguous_cache_blocks);
    }
    MatrixViewType
    split_bottom_block (MatrixViewType& A, const bool contiguous_cache_blocks) const
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      const ordinal_type nrows_bottom = 
	strategy_.bottom_block_split_nrows (A.nrows(), ncols(), 
					    nrows_cache_block());
      // split_bottom() modifies A
      return A.split_bottom (nrows_bottom, contiguous_cache_blocks);
    }
    MatrixViewType
    top_block (const MatrixViewType& A, const bool contiguous_cache_blocks) const
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      const ordinal_type nrows_top = 
	strategy_.top_block_split_nrows (A.nrows(), ncols(), 
					 nrows_cache_block());
      MatrixViewType A_copy (A);
      return A_copy.split_top (nrows_top, contiguous_cache_blocks);
    }
Beispiel #9
0
    MatrixViewType
    split_bottom_block (MatrixViewType& A, const bool contiguous_cache_blocks) const
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      // Ignore the number of columns in A, since we want to block all
      // matrices using the same cache blocking strategy.
      const ordinal_type nrows_bottom = 
	strategy_.bottom_block_split_nrows (A.nrows(), ncols(), 
					    nrows_cache_block());
      // split_bottom() sets A to A_rest, and returns A_bot.
      return A.split_bottom (nrows_bottom, contiguous_cache_blocks);
    }
Beispiel #10
0
    MatrixViewType
    top_block (const MatrixViewType& A, const bool contiguous_cache_blocks) const
    {
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      // Ignore the number of columns in A, since we want to block all
      // matrices using the same cache blocking strategy.
      const ordinal_type nrows_top = 
	strategy_.top_block_split_nrows (A.nrows(), ncols(), 
					 nrows_cache_block());
      MatrixViewType A_copy (A);
      return A_copy.split_top (nrows_top, contiguous_cache_blocks);
    }
      void
      implicit_Q (MatrixViewType& Q, 
		  typename MatrixViewType::scalar_type tau[])
      {
	implicit_Q (Q.nrows(), Q.ncols(), Q.get(), Q.lda(), tau);
      }
Beispiel #12
0
 static void
 printMatrix (std::ostream& out,
              const MatrixViewType& A)
 {
   print_local_matrix (out, A.nrows(), A.ncols(), A.get(), A.lda());
 }
    void
    randomGlobalMatrix (Generator* const pGenerator,
                        MatrixViewType& A_local,
                        const typename Teuchos::ScalarTraits< typename MatrixViewType::scalar_type >::magnitudeType singular_values[],
                        MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger,
                        MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger)
    {
      using Teuchos::NO_TRANS;
      using std::vector;
      typedef typename MatrixViewType::ordinal_type ordinal_type;
      typedef typename MatrixViewType::scalar_type scalar_type;


      const bool b_local_debug = false;

      const int rootProc = 0;
      const int nprocs = ordinalMessenger->size();
      const int myRank = ordinalMessenger->rank();
      Teuchos::BLAS<ordinal_type, scalar_type> blas;

      const ordinal_type nrowsLocal = A_local.nrows();
      const ordinal_type ncols = A_local.ncols();

      // Theory: Suppose there are P processors.  Proc q wants an m_q by n
      // component of the matrix A, which we write as A_q.  On Proc 0, we
      // generate random m_q by n orthogonal matrices Q_q (in explicit
      // form), and send Q_q to Proc q.  The m by n matrix [Q_0; Q_1; ...;
      // Q_{P-1}] is not itself orthogonal.  However, the m by n matrix
      // Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal:
      //
      // \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I.

      if (myRank == rootProc)
        {
          typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type;
          matgen_type matGen (*pGenerator);

          // Generate a random ncols by ncols upper triangular matrix
          // R with the given singular values.
          Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0));
          matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values);

          // Broadcast R to all the processors.
          scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);

          // Generate (for myself) a random nrowsLocal x ncols
          // orthogonal matrix, stored in explicit form.
          Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
          matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda());

          // Scale the (local) orthogonal matrix by the number of
          // processors P, to make the columns of the global matrix Q
          // orthogonal.  (Otherwise the norm of each column will be P
          // instead of 1.)
          const scalar_type P = static_cast< scalar_type > (nprocs);
          // Do overflow check.  If casting P back to scalar_type
          // doesn't produce the same value as nprocs, the cast
          // overflowed.  We take the real part, because scalar_type
          // might be complex.
          if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
            throw std::runtime_error ("Casting nprocs to Scalar failed");

          scaleMatrix (Q_local, P);

          // A_local := Q_local * R
          blas.GEMM (NO_TRANS, NO_TRANS, nrowsLocal, ncols, ncols,
                     scalar_type(1), Q_local.get(), Q_local.lda(),
                     R.get(), R.lda(),
                     scalar_type(0), A_local.get(), A_local.lda());

          for (int recvProc = 1; recvProc < nprocs; ++recvProc)
            {
              // Ask the receiving processor how big (i.e., how many rows)
              // its local component of the matrix is.
              ordinal_type nrowsRemote = 0;
              ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0);

              if (b_local_debug)
                {
                  std::ostringstream os;
                  os << "For Proc " << recvProc << ": local block is "
                     << nrowsRemote << " by " << ncols << std::endl;
                  std::cerr << os.str();
                }

              // Make sure Q_local is big enough to hold the data for
              // the current receiver proc.
              Q_local.reshape (nrowsRemote, ncols);

              // Compute a random nrowsRemote * ncols orthogonal
              // matrix Q_local, for the current receiving processor.
              matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda());

              // Send Q_local to the current receiving processor.
              scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0);
            }
        }
      else
        {
          // Receive the R factor from Proc 0.  There's only 1 R
          // factor for all the processes.
          Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type (0));
          scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);

          // Q_local (nrows_local by ncols, random orthogonal matrix)
          // will be received from Proc 0, where it was generated.
          const ordinal_type recvSize = nrowsLocal * ncols;
          Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);

          // Tell Proc 0 how many rows there are in the random orthogonal
          // matrix I want to receive from Proc 0.
          ordinalMessenger->send (&nrowsLocal, 1, rootProc, 0);

          // Receive the orthogonal matrix from Proc 0.
          scalarMessenger->recv (Q_local.get(), recvSize, rootProc, 0);

          // Scale the (local) orthogonal matrix by the number of
          // processors, to make the global matrix Q orthogonal.
          const scalar_type P = static_cast< scalar_type > (nprocs);
          // Do overflow check.  If casting P back to scalar_type
          // doesn't produce the same value as nprocs, the cast
          // overflowed.  We take the real part, because scalar_type
          // might be complex.
          if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
            throw std::runtime_error ("Casting nprocs to Scalar failed");
          scaleMatrix (Q_local, P);

          // A_local := Q_local * R
          blas.GEMM (NO_TRANS, NO_TRANS, nrowsLocal, ncols, ncols,
                     scalar_type(1), Q_local.get(), Q_local.lda(),
                     R.get(), R.lda(),
                     scalar_type(0), A_local.get(), A_local.lda());
        }
    }