Exemple #1
0
void matrix::svd(matrix& U, diagMatrix& S, matrix& Vdag) const
{	static StopWatch watch("matrix::svd");
	watch.start();
	//Initialize input and outputs:
	matrix A = *this; //destructible copy
	int M = A.nRows();
	int N = A.nCols();
	U.init(M,M);
	Vdag.init(N,N);
	S.resize(std::min(M,N));
	//Initialize temporaries:
	char jobz = 'A'; //full SVD (return complete unitary matrices)
	int lwork = 2*(M*N + M + N);
	std::vector<complex> work(lwork);
	std::vector<double> rwork(S.nRows() * std::max(5*S.nRows()+7, 2*(M+N)+1));
	std::vector<int> iwork(8*S.nRows());
	//Call LAPACK and check errors:
	int info=0;
	zgesdd_(&jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N,
		work.data(), &lwork, rwork.data(), iwork.data(), &info);
	if(info>0) //convergence failure; try the slower stabler version
	{	int info=0;
		matrix A = *this; //destructible copy
		zgesvd_(&jobz, &jobz, &M, &N, A.data(), &M, S.data(), U.data(), &M, Vdag.data(), &N,
			work.data(), &lwork, rwork.data(), &info);
		if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESVD is invalid.\n", -info); stackTraceExit(1); }
		if(info>0) { logPrintf("Error code %d in LAPACK SVD routine ZGESVD.\n", info); stackTraceExit(1); }
	}
	if(info<0) { logPrintf("Argument# %d to LAPACK SVD routine ZGESDD is invalid.\n", -info); stackTraceExit(1); }
	watch.stop();
}
Exemple #2
0
bool schur(const cmat &A, cmat &U, cmat &T)
{
  it_assert_debug(A.rows() == A.cols(), "schur(): Matrix is not square");

  char jobvs = 'V';
  char sort = 'N';
  int info;
  int n = A.rows();
  int lda = n;
  int ldvs = n;
  int lwork = 2 * n; // This may be choosen better!
  int sdim = 0;
  vec rwork(n);
  cvec w(n);
  cvec work(lwork);

  T.set_size(lda, n, false);
  U.set_size(ldvs, n, false);

  T = A; // The routine overwrites input matrix with eigenvectors

  zgees_(&jobvs, &sort, 0, &n, T._data(), &lda, &sdim, w._data(), U._data(),
         &ldvs, work._data(), &lwork, rwork._data(), 0, &info);

  return (info == 0);
}
Exemple #3
0
    void operator() (char const jobz, char const uplo, int const n,
                     int const kd, T* ab, int const ldab, R* w, T* z,
                     int const ldz, optimal_workspace , int& info ) const {
        traits::detail::array<T> work( n );
        traits::detail::array<R> rwork( 3*n-2 );

        hbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz,
              traits::vector_storage( work ),
              traits::vector_storage( rwork ),
              info );
    }
Exemple #4
0
          inline
          int operator() (char jobvs, MatrA& a, EigVal& w, SchVec& vs, minimal_workspace ) const {
             typedef typename MatrA::value_type                            value_type ;
             typedef typename traits::type_traits< value_type >::real_type real_type ;

             int n = traits::matrix_size1( a );

             traits::detail::array<value_type> work( 2*n );
             traits::detail::array<real_type>  rwork( n );

             return gees( jobvs, a, w, vs, work, rwork );
          } // gees()
Exemple #5
0
Fichier : qr.cpp Projet : nvmd/itpp
bool qr(const cmat &A, cmat &Q, cmat &R, bmat &P)
{
  int info;
  int m = A.rows();
  int n = A.cols();
  int lwork = n;
  int k = std::min(m, n);
  cvec tau(k);
  cvec work(lwork);
  vec rwork(std::max(1, 2*n));
  ivec jpvt(n);
  jpvt.zeros();

  R = A;

  // perform workspace query for optimum lwork value
  int lwork_tmp = -1;
  zgeqp3_(&m, &n, R._data(), &m, jpvt._data(), tau._data(), work._data(),
          &lwork_tmp, rwork._data(), &info);
  if (info == 0) {
    lwork = static_cast<int>(real(work(0)));
    work.set_size(lwork, false);
  }
  zgeqp3_(&m, &n, R._data(), &m, jpvt._data(), tau._data(), work._data(),
          &lwork, rwork._data(), &info);

  Q = R;
  Q.set_size(m, m, true);

  // construct permutation matrix
  P = zeros_b(n, n);
  for (int j = 0; j < n; j++)
    P(jpvt(j) - 1, j) = 1;

  // construct R
  for (int i = 0; i < m; i++)
    for (int j = 0; j < std::min(i, n); j++)
      R(i, j) = 0;

  // perform workspace query for optimum lwork value
  lwork_tmp = -1;
  zungqr_(&m, &m, &k, Q._data(), &m, tau._data(), work._data(), &lwork_tmp,
          &info);
  if (info == 0) {
    lwork = static_cast<int>(real(work(0)));
    work.set_size(lwork, false);
  }
  zungqr_(&m, &m, &k, Q._data(), &m, tau._data(), work._data(), &lwork,
          &info);

  return (info == 0);
}
Exemple #6
0
// complex double
void PivotedQRWrapper(int m, int n, std::complex<double> *A, int lda,
                      std::vector<int>& jpvt,
                      std::vector< std::complex<double> >& tau) {
#ifndef RELEASE
    CallStackEntry entry("PivotedQRWrapper");
#endif
    int lwork = (n + 1) * BLOCKSIZE;
    std::vector< std::complex<double> > work(lwork);
    std::vector<double> rwork(lapack::PivotedQRRealWorkSize(n));
    lapack::PivotedQR(m, n, A, lda, &jpvt[0], &tau[0], &work[0], lwork, &rwork[0]);
    for( size_t i=0; i<jpvt.size(); ++i )
        jpvt[i]--;
}
Exemple #7
0
    BOOST_FORCEINLINE result_type operator()(A0 const& a0, A1 const& a1, A2 const& a2) const
    {
        result_type rcond;
        nt2_la_int n = nt2::height(a0);
        nt2_la_int ld = n;
        nt2_la_int info;

        nt2::memory::container<tag::table_, v_t, nt2::_2D> work(nt2::of_size(2*n,1));
        nt2::memory::container<tag::table_, result_type, nt2::_2D>  rwork(nt2::of_size(2*n,1));

        NT2_F77NAME(zgecon) ( &a1, &n, a0.raw(), &ld, &a2, &rcond , work.raw()
                              , rwork.raw(), &info
                            );

        return rcond;
    }
Exemple #8
0
void MultiplyAdjoint
( int maxRank, std::complex<Real> alpha,
  const Dense<std::complex<Real> >& A,
  const Dense<std::complex<Real> >& B,
        LowRank<std::complex<Real> >& C )
{
#ifndef RELEASE
    CallStackEntry entry("hmat_tools::MultiplyAdjoint (F := D D^H)");
#endif
    typedef std::complex<Real> Scalar;

    const int m = A.Height();
    const int n = B.Height();
    const int minDim = std::min( m, n );
    const int r = std::min( minDim, maxRank );

    // C.U := alpha A B^H
    MultiplyAdjoint( alpha, A, B, C.U );

    // Get the economic SVD of C.U, C.U = U Sigma V^H, overwriting C.U with U.
    Vector<Real> s( minDim );
    Dense<Scalar> VH( minDim, n );
    const int lwork = lapack::SVDWorkSize( m, n );
    std::vector<Scalar> work( lwork );
    std::vector<Real> rwork( 5*minDim );
    lapack::SVD
    ( 'O', 'S', m, n, C.U.Buffer(), C.U.LDim(),
      s.Buffer(), 0, 1, VH.Buffer(), VH.LDim(),
      &work[0], lwork, &rwork[0] );

    // Truncate the SVD in-place
    C.U.Resize( m, r );
    s.Resize( r );
    VH.Resize( r, n );

    C.V.SetType( GENERAL ); C.V.Resize( n, r );
    // Put (Sigma V^H)^T = (V^H)^T Sigma into C.V
    const int VHLDim = VH.LDim();
    for( int j=0; j<r; ++j )
    {
        const Real sigma = s.Get(j);
        Scalar* RESTRICT VCol = C.V.Buffer(0,j);
        const Scalar* RESTRICT VHRow = VH.LockedBuffer(j,0);
        for( int i=0; i<n; ++i )
            VCol[i] = sigma*VHRow[i*VHLDim];
    }
}
Exemple #9
0
void DivideAndConquerSVD
( int m, int n, dcomplex* A, int lda, 
  double* s, dcomplex* U, int ldu, dcomplex* VAdj, int ldva )
{
#ifndef RELEASE
    PushCallStack("lapack::DivideAndConquerSVD");
#endif
    if( m==0 || n==0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }

    const char jobz='S';
    int lwork=-1, info;
    dcomplex dummyWork;
    const int k = std::min(m,n);
    const int K = std::max(m,n);
    const int lrwork = k*std::max(5*k+7,2*K+2*k+1);
    std::vector<double> rwork(lrwork);
    std::vector<int> iwork(8*k);

    LAPACK(zgesdd)
    ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork,
      &rwork[0], &iwork[0], &info );

    lwork = dummyWork.real;
    std::vector<dcomplex> work(lwork);
    LAPACK(zgesdd)
    ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork,
      &rwork[0], &iwork[0], &info );
    if( info < 0 )
    {
        std::ostringstream msg;
        msg << "Argument " << -info << " had illegal value";
        throw std::logic_error( msg.str().c_str() );
    }
    else if( info > 0 )
    {
        throw std::runtime_error("zgesdd's updating process failed");
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #10
0
void matrix::diagonalize(matrix& evecs, diagMatrix& eigs) const
{	static StopWatch watch("matrix::diagonalize");
	watch.start();
	
	myassert(nCols()==nRows());
	int N = nRows();
	myassert(N > 0);
	
	//Check hermiticity
	const complex* thisData = data();
	double errNum=0.0, errDen=0.0;
	for(int i=0; i<N; i++)
		for(int j=0; j<N; j++)
		{	errNum += norm(thisData[index(i,j)]-thisData[index(j,i)].conj());
			errDen += norm(thisData[index(i,j)]);
		}
	double hermErr = sqrt(errNum / (errDen*N));
	if(hermErr > 1e-10)
	{	logPrintf("Relative hermiticity error of %le (>1e-10) encountered in diagonalize\n", hermErr);
		stackTraceExit(1);
	}
	
	char jobz = 'V'; //compute eigenvectors and eigenvalues
	char range = 'A'; //compute all eigenvalues
	char uplo = 'U'; //use upper-triangular part
	matrix A = *this; //copy input matrix (zheevr destroys input matrix)
	double eigMin = 0., eigMax = 0.; //eigenvalue range (not used for range-type 'A')
	int indexMin = 0, indexMax = 0; //eignevalue index range (not used for range-type 'A')
	double absTol = 0.; int nEigsFound;
	eigs.resize(N);
	evecs.init(N, N);
	std::vector<int> iSuppz(2*N);
	int lwork = (64+1)*N; std::vector<complex> work(lwork); //Magic number 64 obtained by running ILAENV as suggested in doc of zheevr (and taking the max over all N)
	int lrwork = 24*N; std::vector<double> rwork(lrwork); //from doc of zheevr
	int liwork = 10*N; std::vector<int> iwork(liwork); //from doc of zheevr
	int info=0;
	zheevr_(&jobz, &range, &uplo, &N, A.data(), &N,
		&eigMin, &eigMax, &indexMin, &indexMax, &absTol, &nEigsFound,
		eigs.data(), evecs.data(), &N, iSuppz.data(), work.data(), &lwork,
		rwork.data(), &lrwork, iwork.data(), &liwork, &info);
	if(info<0) { logPrintf("Argument# %d to LAPACK eigenvalue routine ZHEEVR is invalid.\n", -info); stackTraceExit(1); }
	if(info>0) { logPrintf("Error code %d in LAPACK eigenvalue routine ZHEEVR.\n", info); stackTraceExit(1); }
	watch.stop();
}
Exemple #11
0
void QRSVD
( int m, int n, scomplex* A, int lda, 
  float* s, scomplex* U, int ldu, scomplex* VAdj, int ldva )
{
#ifndef RELEASE
    PushCallStack("lapack::QRSVD");
#endif
    if( m==0 || n==0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }

    const char jobu='S', jobva='S';
    int lwork=-1, info;
    const int k = std::min(m,n);
    std::vector<float> rwork(5*k);

    scomplex dummyWork;
    LAPACK(cgesvd)
    ( &jobu, &jobva, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, 
      &dummyWork, &lwork, &rwork[0], &info );

    lwork = dummyWork.real;
    std::vector<scomplex> work(lwork);
    LAPACK(cgesvd)
    ( &jobu, &jobva, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, 
      &work[0], &lwork, &rwork[0], &info );
    if( info < 0 )
    {
        std::ostringstream msg;
        msg << "Argument " << -info << " had illegal value";
        throw std::logic_error( msg.str().c_str() );
    }
    else if( info > 0 )
    {
        throw std::runtime_error("cgesvd's updating process failed");
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #12
0
void SingularValues( int m, int n, dcomplex* A, int lda, double* s )
{
#ifndef RELEASE
    PushCallStack("lapack::SingularValues");
#endif
    if( m==0 || n==0 )
    {
#ifndef RELEASE
        PopCallStack();
#endif
        return;
    }

    const char jobu='N', jobva='N';
    int fakeLDim=1, lwork=-1, info;
    dcomplex dummyWork;
    const int k = std::min(m,n);
    std::vector<double> rwork(5*k);

    LAPACK(zgesvd)
    ( &jobu, &jobva, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, 
      &dummyWork, &lwork, &rwork[0], &info );

    lwork = dummyWork.real;
    std::vector<dcomplex> work(lwork);
    LAPACK(zgesvd)
    ( &jobu, &jobva, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, 
      &work[0], &lwork, &rwork[0], &info );
    if( info < 0 )
    {
        std::ostringstream msg;
        msg << "Argument " << -info << " had illegal value";
        throw std::logic_error( msg.str().c_str() );
    }
    else if( info > 0 )
    {
        throw std::runtime_error("zgesvd's updating process failed");
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #13
0
// Condition estimate: 
// Return ratio of largest/smallest singular values.
//---------------------------------------------------------
double cond(ZMat& mat)
//---------------------------------------------------------
{
  double rcond = 1.;

  ZVec  work(2*mat.num_cols(), "work");
  DVec rwork(2*mat.num_cols(), "rwork");

  int info;

  ZMat matcopy(mat);
  IVec ipiv(mat.num_rows(), "ipiv");

  ZGETRF(mat.num_rows(),
         mat.num_cols(),
         matcopy.data(),
         mat.num_rows(),
         ipiv.data(),
         info);


  //  fprintf(stdout, "zgetrf: info=%d \n", info);

  // must fix this
  double anorm = 1.0;

  ZGECON('I',
          mat.num_cols(), 
          matcopy.data(), 
          mat.num_rows(),
          anorm, 
          rcond, 
          work.data(), 
          rwork.data(), 
          info);

  //  fprintf(stdout, "zgecon: info=%d rcond=%lf\n", info, rcond);

  return 1./rcond;
}
Exemple #14
0
void matrix::diagonalize(matrix& levecs, std::vector<complex>& eigs, matrix& revecs) const
{	static StopWatch watch("matrix::diagonalizeNH");
	watch.start();
	//Prepare inputs and outputs:
	matrix A = *this; //destructible copy
	int N = A.nRows();
	myassert(N > 0);
	myassert(A.nCols()==N);
	eigs.resize(N);
	levecs.init(N, N);
	revecs.init(N, N);
	//Prepare temporaries:
	char jobz = 'V'; //compute eigenvectors and eigenvalues
	int lwork = (64+1)*N; std::vector<complex> work(lwork); //Magic number 64 obtained by running ILAENV as suggested in doc of zheevr (and taking the max over all N)
	std::vector<double> rwork(2*N);
	//Call LAPACK and check errors:
	int info=0;
	zgeev_(&jobz, &jobz, &N, A.data(), &N, eigs.data(), levecs.data(), &N, revecs.data(), &N, work.data(), &lwork, rwork.data(), &info);
	if(info<0) { logPrintf("Argument# %d to LAPACK eigenvalue routine ZGEEV is invalid.\n", -info); stackTraceExit(1); }
	if(info>0) { logPrintf("Error code %d in LAPACK eigenvalue routine ZGEEV.\n", info); stackTraceExit(1); }
	watch.stop();
}
Exemple #15
0
main(int argc, char **argv ) {

  /*
    This is the Hello World program for CPSC424/524.

    Author: Andrew Sherman, Yale University

    Date: 1/23/2017

    Credits: This program is based on a program provided by Barry Wilkinson (UNCC), which 
             had a similar communication pattern, but did not include any simulated work.
  */

  char message[100];
  int i,rank, size, type=99; 
  int worktime, sparm, rwork(int,int);
  double wct0, wct1, total_time;

  MPI_Status status;

  MPI_Init(&argc,&argv); // Required MPI initialization call

  MPI_Comm_size(MPI_COMM_WORLD,&size); // Get no. of processes
  MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Which process am I?

  /* If I am the master (rank 0) ... */
  if (rank == 0) {

    sparm = rwork(0,0); //initialize the workers' work times
    
    /* Create the message using sprintf */
    sprintf(message, "Hello, from process %d.",rank);

    MPI_Barrier(MPI_COMM_WORLD); //wait for everyone to be ready before starting timer
    wct0 = MPI_Wtime(); //set the start time

    /* Send the message to all the workers, which is where the work happens */
    for (i=1; i<size; i++) {
      MPI_Send(message, strlen(message)+1, MPI_CHAR, i, type, MPI_COMM_WORLD);
      MPI_Send(&sparm, 1, MPI_INT, i, type, MPI_COMM_WORLD);
    }

    for (i=1; i<size; i++) {
      MPI_Recv(message, 100, MPI_CHAR, i, type, MPI_COMM_WORLD, &status);
      sleep(3);
      printf("Message from process %d: %s\n", i, message);
    }

    wct1 = MPI_Wtime(); // Get total elapsed time
    total_time = wct1 - wct0;
    printf("Message printed by master: Total elapsed time is %f seconds.\n",total_time);

  }

  /* Otherwise, if I am a worker ... */
  else {
 
    MPI_Barrier(MPI_COMM_WORLD); //wait for everyone to be ready before starting
  /* Receive messages from the master */
    MPI_Recv(message, 100, MPI_CHAR, 0, type, MPI_COMM_WORLD, &status);
    MPI_Recv(&sparm, 1, MPI_INT, 0, type, MPI_COMM_WORLD, &status);

    worktime = rwork(rank,sparm); // Simulate some work

    // printf("From process %d: I worked for %d seconds after receiving the following message:\n\t %s\n",
	   // rank,worktime,message);
    sprintf(message, "Hello master, from process %d after working %d seconds", rank, worktime);
    MPI_Send(message, strlen(message)+1, MPI_CHAR, 0, type, MPI_COMM_WORLD);
  }

  MPI_Finalize(); // Required MPI termination call
}
Exemple #16
0
  void DenseLinearEigenSystem< std::complex<double> >::eigensolve_lapack_with_vectors()
  {
#ifndef LAPACK
    std::string problem;
    problem = "The DenseLinearEigenSystem::eigensolve_lapack_with_vectors method has been called\n";
    problem += "but the compiler option -DLAPACK was not provided when\n";
    problem += "the library was built.";
    throw ExceptionExternal( problem );
#else

    std::size_t N = p_A -> nrows();
    // Cache issues of varying significance plague problems of size 2^j + 2^k + ...
    // when LDA = N, so this is my shameless 'tweak' to maintain predictable
    // performance, at least for N <=1024 or so.
    int padding( 0 );
    if ( ( N % 2 == 0 ) && ( N > 127 ) )
    {
      padding = 1;
    }
#ifdef PARANOID
    if ( ( p_A -> nrows() != p_B -> nrows() ) ||
         ( p_A -> ncols() != p_B -> ncols() ) )
    {
      std::string problem( "The DenseLinearEigenSystem::eigensolve_lapack_with_vectors method has detected a failure. \n" );
      throw ExceptionGeom( problem, p_A -> nrows(), p_A -> ncols(),
                           p_B -> nrows(), p_B -> ncols() );
    }
#endif
    // transpose the input matrices so they are in column_major format
    FortranData Af( *p_A, true, padding );
    FortranData Bf( *p_B, true, padding );
    // eigenvalue storage
    DenseVector<double> alpha( 2 * N, 0.0 );
    DenseVector<double> beta( 2 * N, 0.0 );
    // eigenvector storage
    DenseVector<double> vec_left( 2, 0.0 );
    DenseVector<double> vec_right( 2 * N * N, 0.0 );
    // some workspace for the LAPACK routine
    DenseVector<double> work( 2, 0.0 );
    DenseVector<double> rwork( 8 * N, 0.0 );
    int info( 0 );
    // Call FORTRAN LAPACK to get the required workspace
    LAPACK_ZGGEV( ( char* ) "N", ( char* ) "V", N, Af.base(), N + padding, Bf.base(), N + padding, &alpha[ 0 ], &beta[ 0 ], &vec_left[ 0 ], 1, &vec_right[ 0 ], N, &work[ 0 ], -1, &rwork[ 0 ], info );
    int required_workspace = int( work[ 0 ] );
#ifdef DEBUG

    std::cout << "[DEBUG] DenseLinearEigenSystem::eigensolve_lapack_with_vectors is requesting \n";
    std::cout << "[DEBUG] a workspace vector of size " << required_workspace << "\n";
#endif

    work.resize( 2 * required_workspace );
    // call FORTRAN LAPACK again with the optimum workspace
    LAPACK_ZGGEV( ( char* ) "N", ( char* ) "V", N, Af.base(), N + padding, Bf.base(), N + padding, &alpha[ 0 ], &beta[ 0 ], &vec_left[ 0 ], 1, &vec_right[ 0 ], N, &work[ 0 ], required_workspace, &rwork[ 0 ], info );
    if ( 0 != info )
    {
      std::string problem( "The DenseLinearEigenSystem::eigensolve_lapack_with_vectors method has detected a failure.\n" );
      throw ExceptionExternal( problem, info );
    }
    // create a complex eigenvalue vector
    EIGENVALUES_ALPHA = DenseVector<D_complex>( N, 0.0 );
    EIGENVALUES_BETA = DenseVector<D_complex>( N, 0.0 );
    // complex eigenvector matrix
    ALL_EIGENVECTORS = DenseMatrix<D_complex>( N, N, 0.0 );
    // step through the eigenvalues
    for ( std::size_t i = 0; i < N; ++i )
    {
      const D_complex eye( 0.0, 1.0 );
      EIGENVALUES_ALPHA[ i ] = alpha[ 2 * i ] + alpha[ 2 * i + 1 ] * eye;
      EIGENVALUES_BETA[ i ] = beta[ 2 * i ] + beta[ 2 * i + 1 ] * eye;
      for ( std::size_t j = 0; j < N; ++j )
      {
        ALL_EIGENVECTORS( i, j ) = vec_right[ 2 * i * N + 2 * j ] + vec_right[ 2 * i * N + 2 * j + 1 ] * eye;
      }
    }
#endif

  }
Exemple #17
0
int Pseudoinverse(
	size_t m, size_t n,
	const doublecomplex *A, size_t lda,
	doublecomplex *P, size_t ldp
){
	integer info;
	CMat Acopy(Eigen::Map<const CMat,Eigen::Unaligned,Eigen::OuterStride<> >(A, m, n, Eigen::OuterStride<>(lda)));
	Eigen::Map<CMat,Eigen::Unaligned,Eigen::OuterStride<> > mP(P, n, m, Eigen::OuterStride<>(ldp));
	if(m >= n){ // tall case
		RVec S(n);
		CMat VH(n,n);
		doublecomplex dum;
		integer lwork = -1;
		RVec rwork(5*n);
		zgesvd_(
			"O","A", m,n, Acopy.data(), Acopy.outerStride(),
			S.data(), NULL, m, VH.data(), VH.outerStride(),
			&dum, lwork, rwork.data(), &info
		);
		lwork = (integer)dum.real();
		CVec work(lwork);
		zgesvd_(
			"O","A", m,n, Acopy.data(), Acopy.outerStride(),
			S.data(), NULL, m, VH.data(), VH.outerStride(),
			work.data(), lwork, rwork.data(), &info
		);
		mP = Acopy.adjoint();
		
		{
			double threshold = 2 * std::numeric_limits<double>::epsilon() * S[0];
			for(size_t i = 0; i < n; ++i){
				if(S[i] < threshold){
					break;
				}
				S[i] = 1./S[i];
			}
		}
		mP = VH.adjoint() * S.asDiagonal() * mP;
	}else{ // wide case
		RVec S(m);
		CMat U(m,m);
		doublecomplex dum;
		integer lwork = -1;
		RVec rwork(5*m);
		zgesvd_(
			"A","O", m,n, Acopy.data(), Acopy.outerStride(),
			S.data(), U.data(), U.outerStride(), NULL, m,
			&dum, lwork, rwork.data(), &info
		);
		lwork = (integer)dum.real();
		CVec work(lwork);
		zgesvd_(
			"A","O", m,n, Acopy.data(), Acopy.outerStride(),
			S.data(), U.data(), U.outerStride(), NULL, m,
			work.data(), lwork, rwork.data(), &info
		);
		mP = Acopy.adjoint();
		
		{
			double threshold = 2 * std::numeric_limits<double>::epsilon() * S[0];
			for(size_t i = 0; i < m; ++i){
				if(S[i] < threshold){
					break;
				}
				S[i] = 1./S[i];
			}
		}
		mP = mP * S.asDiagonal() * U.adjoint();
	}
	return info;
}