//=============================================================================
int Epetra_SerialDenseSVD::Factor() {

  int ierr = 0;

  ANORM_ = Matrix_->OneNorm(); // Compute 1-Norm of A

  //allocate U_, S_, and Vt_ if not already done
  if(U_==0)
  {
    U_ = new double[M_*N_];
    S_ = new double[M_];
    Vt_ = new double[M_*N_];
  }
  else //zero them out
  {
    for( int i = 0; i < M_; ++i ) S_[i]=0.0;
    for( int i = 0; i < M_*N_; ++i )
    {
      U_[i]=0.0;
      Vt_[i]=0.0;
    }
  }

//  if (Equilibrate_) ierr = EquilibrateMatrix();

  if (ierr!=0) EPETRA_CHK_ERR(ierr-2);

  //allocate temp work space
  int lwork = 5*M_;
  double *work = new double[lwork];
  char job = 'A';

  //create temporary matrix to avoid writeover of original
  Epetra_SerialDenseMatrix tempMat( *Matrix_ );
  GESVD( job, job, M_, N_, tempMat.A(), LDA_, S_, U_, N_, Vt_, M_, work, &lwork, &INFO_ );

  delete [] work;

  Factored_ = true;
  double DN = N_;
  UpdateFlops(2.0*(DN*DN*DN)/3.0);

  EPETRA_CHK_ERR(INFO_);
  return(0);
}
Example #2
0
// Computes an SVD factorization of a real MxN matrix.
// Returns the vector of singular values.
// Also, factors U, VT, where A = U * D * VT.
//---------------------------------------------------------
DVec& svd
(
  const DMat& mat,  // [in]
        DMat& U,    // [out: left singular vectors]
        DMat& VT,   // [out: right singular vectors]
        char ju,    // [in: want U?]
        char jvt    // [in: want VT?]
)
//---------------------------------------------------------
{
  // Work with a copy of the input matrix.
  DMat A(mat, OBJ_temp, "svd.TMP");

  // A(MxN)
  int m=A.num_rows(), n=A.num_cols();
  int mmn=A.min_mn(), xmn=A.max_mn();

  // resize parameters
  U.resize (m,m, true, 0.0);
  VT.resize(n,n, true, 0.0);
  DVec* s = new DVec(mmn, 0.0, OBJ_temp, "s.TMP");
  char jobu  = ju;
  char jobvt = jvt;
  int info = 0;

  // NBN: ACML does not use the work vector.
  int lwork = 2 * std::max(3*mmn+xmn, 5*mmn);
  DVec work(lwork, 0.0, OBJ_temp, "work.TMP");
  GESVD (jobu, jobvt, m, n, A.data(), m, s->data(), U.data(), m, VT.data(), n, work.data(), lwork, info);

  if (info < 0) { 
    (*s) = 0.0;
    umERROR("SVD", "Error in input argument (%d)\nNo solution computed.", -info);
  } else if (info > 0) {
    (*s) = 0.0;
    umLOG(1, "DBDSQR did not converge."
             "\n%d superdiagonals of an intermediate bidiagonal"
             "\nform B did not converge to zero.\n", info);
  }

  return (*s);
}
Example #3
0
/*
 * This function returns the solution of Ax = b
 *
 * The function is based on SVD decomposition:
 * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
 * (U D V^T) x = b or x=V D^{-1} U^T b
 * Note that V D^{-1} U^T is the pseudoinverse A^+
 *
 * A is mxm, b is mx1.
 *
 * The function returns 0 in case of error, 1 if successful
 *
 * This function is often called repetitively to solve problems of identical
 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
 * retained between calls and free'd-malloc'ed when not of the appropriate size.
 * A call with NULL as the first argument forces this memory to be released.
 */
int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
{
__STATIC__ LM_REAL *buf=NULL;
__STATIC__ int buf_sz=0;
static LM_REAL eps=LM_CNST(-1.0);

register int i, j;
LM_REAL *a, *u, *s, *vt, *work;
int a_sz, u_sz, s_sz, vt_sz, tot_sz;
LM_REAL thresh, one_over_denom;
register LM_REAL sum;
int info, rank, worksz, *iwork, iworksz;
   
    if(!A)
#ifdef LINSOLVERS_RETAIN_MEMORY
    {
      if(buf) free(buf);
      buf=NULL;
      buf_sz=0;

      return 1;
    }
#else
      return 1; /* NOP */
#endif /* LINSOLVERS_RETAIN_MEMORY */
   
  /* calculate required memory size */
#if 1 /* use optimal size */
  worksz=-1; // workspace query. Keep in mind that GESDD requires more memory than GESVD
  /* note that optimal work size is returned in thresh */
  GESVD("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, &info);
  //GESDD("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, NULL, &info);
  worksz=(int)thresh;
#else /* use minimum size */
  worksz=5*m; // min worksize for GESVD
  //worksz=m*(7*m+4); // min worksize for GESDD
#endif
  iworksz=8*m;
  a_sz=m*m;
  u_sz=m*m; s_sz=m; vt_sz=m*m;

  tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */

#ifdef LINSOLVERS_RETAIN_MEMORY
  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
    if(buf) free(buf); /* free previously allocated memory */

    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
      exit(1);
    }
  }
#else
    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
      exit(1);
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */

  a=buf;
  u=a+a_sz;
  s=u+u_sz;
  vt=s+s_sz;
  work=vt+vt_sz;
  iwork=(int *)(work+worksz);

  /* store A (column major!) into a */
  for(i=0; i<m; i++)
    for(j=0; j<m; j++)
      a[i+j*m]=A[i*m+j];

  /* SVD decomposition of A */
  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);

  /* error treatment */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info);
      exit(1);
    }
    else{
      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info);
#ifndef LINSOLVERS_RETAIN_MEMORY
      free(buf);
#endif

      return 0;
    }
  }

  if(eps<0.0){
    LM_REAL aux;

    /* compute machine epsilon */
    for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
                                          ;
    eps*=LM_CNST(2.0);
  }

  /* compute the pseudoinverse in a */
	for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */
  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
    one_over_denom=LM_CNST(1.0)/s[rank];

    for(j=0; j<m; j++)
      for(i=0; i<m; i++)
        a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
  }

	/* compute A^+ b in x */
	for(i=0; i<m; i++){
	  for(j=0, sum=0.0; j<m; j++)
      sum+=a[i*m+j]*B[j];
    x[i]=sum;
  }

#ifndef LINSOLVERS_RETAIN_MEMORY
  free(buf);
#endif

	return 1;
}
Example #4
0
/*
 * This function returns the solution of Ax = b
 *
 * The function is based on SVD decomposition:
 * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
 * (U D V^T) x = b or x=V D^{-1} U^T b
 * Note that V D^{-1} U^T is the pseudoinverse A^+
 *
 * A is mxm, b is mx1.
 *
 * The function returns 0 in case of error, 1 if successfull
 *
 * This function is often called repetitively to solve problems of identical
 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
 * retained between calls and free'd-malloc'ed when not of the appropriate size.
 * A call with NULL as the first argument forces this memory to be released.
 */
int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
{
__STATIC__ LM_REAL *buf=NULL;
__STATIC__ int buf_sz=0;
static LM_REAL eps=CNST(-1.0);

register int i, j;
LM_REAL *a, *u, *s, *vt, *work;
int a_sz, u_sz, s_sz, vt_sz, tot_sz;
LM_REAL thresh, one_over_denom;
register LM_REAL sum;
blasint info, rank, worksz, *iwork, iworksz;
   
#ifdef LINSOLVERS_RETAIN_MEMORY
    if(!A){
      if(buf) free(buf);
      buf_sz=0;
      return 1;
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */
   
  /* calculate required memory size */
  worksz=16*m; /* more than needed */
  iworksz=8*m;
  a_sz=m*m;
  u_sz=m*m; s_sz=m; vt_sz=m*m;

  tot_sz=iworksz*sizeof(blasint) + (a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL);

#ifdef LINSOLVERS_RETAIN_MEMORY
  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
    if(buf) free(buf); /* free previously allocated memory */

    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
      exit(1);
    }
  }
#else
    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
      exit(1);
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */

  iwork=(blasint *)buf;
  a=(LM_REAL *)(iwork+iworksz);
  /* store A (column major!) into a */
  for(i=0; i<m; i++)
    for(j=0; j<m; j++)
      a[i+j*m]=A[i*m+j];

  u=a + a_sz;
  s=u+u_sz;
  vt=s+s_sz;
  work=vt+vt_sz;

  const blasint mm = m;

  /* SVD decomposition of A */
  GESVD("A", "A", (blasint *)&mm, (blasint *)&mm, a, (blasint *)&mm, s, u, (blasint *)&mm, vt, (blasint *)&mm, work, (blasint *)&worksz, &info);
  //GESDD("A", (blasint *)&mm, (blasint *)&mm, a, (blasint *)&mm, s, u, (blasint *)&mm, vt, (blasint *)&mm, work, (blasint *)&worksz, iwork, &info);

  /* error treatment */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", (long)-info);
      exit(1);
    }
    else{
      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%ld]\n", (long)info);
#ifndef LINSOLVERS_RETAIN_MEMORY
      free(buf);
#endif

      return 0;
    }
  }

  if(eps<0.0){
    LM_REAL aux;

    /* compute machine epsilon */
    for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5))
                                          ;
    eps*=CNST(2.0);
  }

  /* compute the pseudoinverse in a */
	for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */
  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
    one_over_denom=CNST(1.0)/s[rank];

    for(j=0; j<m; j++)
      for(i=0; i<m; i++)
        a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
  }

	/* compute A^+ b in x */
	for(i=0; i<m; i++){
	  for(j=0, sum=0.0; j<m; j++)
      sum+=a[i*m+j]*B[j];
    x[i]=sum;
  }

#ifndef LINSOLVERS_RETAIN_MEMORY
  free(buf);
#endif

	return 1;
}
void nuclear_hard_thresholding(DOUBLE *X, DOUBLE *norm, INT rank, INT M, INT N, DOUBLE *sv, \
		DOUBLE *svecsmall, DOUBLE *sveclarge, DOUBLE *work, INT lwork) {

	INT MINMN = IMIN(M, N);
	INT MAXMN = IMAX(M, N);

	INT svFlag = 0;
	if (sv == NULL) {
		sv = (DOUBLE *) malloc(MINMN * 1 * sizeof(DOUBLE));
		svFlag = 1;
	}

	INT svecsmallFlag = 0;
	if (svecsmall == NULL) {
		svecsmall = (DOUBLE *) malloc(MINMN * MINMN * sizeof(DOUBLE));
		svecsmallFlag = 1;
	}

	INT sveclargeFlag = 0;
	if (sveclarge == NULL) {
		sveclarge = (DOUBLE *) malloc(MAXMN * MINMN * sizeof(DOUBLE));
		sveclargeFlag = 1;
	}

	CHAR jobu = 'S';
	CHAR jobvt = 'S';
	DOUBLE *u;
	DOUBLE *vt;
	if (MAXMN == M) {
		u = sveclarge;
		vt = svecsmall;
	} else {
		u = svecsmall;
		vt = sveclarge;
	}
	INT GESVDM = M;
	INT GESVDN = N;
	INT GESVDLDA = M;
	INT GESVDLDU = M;
	INT GESVDLDVT = MINMN;
	INT info;

	if (lwork == -1) {
		GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, work, &lwork, &info);

		if (svFlag == 1) {
			free(sv);
		}

		if (svecsmallFlag == 1) {
			free(svecsmall);
		}

		if (sveclargeFlag == 1) {
			free(sveclarge);
		}
		return;
	}

	INT workFlag = 0;
	if (lwork == 0) {
		DOUBLE workTemp;
		lwork = -1;
		GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, &workTemp, &lwork, &info);
		if (info != 0) {
			PRINTF("Error, INFO = %d. ", info);
			ERROR("LAPACK error.");
		}

		lwork = (INT) workTemp;
		work = (DOUBLE *) malloc(lwork * 1 * sizeof(DOUBLE));
		workFlag = 1;
	}

	GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, work, &lwork, &info);
	if (info != 0) {
		PRINTF("Error, INFO = %d. ", info);
		ERROR("LAPACK error.");
	}

	INT iterMN;
	DOUBLE normtemp = 0;
	for (iterMN = 0; iterMN < rank; ++iterMN) {
		normtemp += sv[iterMN];
	}

	if (norm != NULL) {
		*norm = normtemp;
	}

	for (iterMN = rank; iterMN < MINMN; ++iterMN) {
		sv[iterMN] = 0;
	}

	/*
	 * TODO: Only multiply for singular vectors corresponding to non-zero singular values.
	 */
	if (MAXMN == M) {
		INT SCALN = M;
		INT incx = 1;
		for (iterMN = 0; iterMN < MINMN; ++iterMN) {
			SCAL(&SCALN, &sv[iterMN], &u[iterMN * M], &incx);
		}

		CHAR transa = 'N';
		CHAR transb = 'N';
		INT GEMMM = M;
		INT GEMMN = N;
		INT GEMMK = MINMN;
		DOUBLE alpha = 1;
		INT GEMMLDA = M;
		INT GEMMLDB = MINMN;
		DOUBLE beta = 0;
		INT GEMMLDC = M;

		GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, u, &GEMMLDA, vt, &GEMMLDB, &beta, X, &GEMMLDC);
	} else {
		INT SCALN = M;
		INT incx = 1;
		for (iterMN = 0; iterMN < MINMN; ++iterMN) {
			SCAL(&SCALN, &sv[iterMN], &u[iterMN * M], &incx);
		}

		CHAR transa = 'N';
		CHAR transb = 'N';
		INT GEMMM = M;
		INT GEMMN = N;
		INT GEMMK = MINMN;
		DOUBLE alpha = 1;
		INT GEMMLDA = M;
		INT GEMMLDB = MINMN;
		DOUBLE beta = 0;
		INT GEMMLDC = M;

		GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, u, &GEMMLDA, vt, &GEMMLDB, &beta, X, &GEMMLDC);
	}

	if (svFlag == 1) {
		free(sv);
	}

	if (svecsmallFlag == 1) {
		free(svecsmall);
	}

	if (sveclargeFlag == 1) {
		free(sveclarge);
	}

	if (workFlag == 1) {
		free(work);
	}
}
// TODO: Change so that adaptive depending on M and N
// TODO: Check what standard I use for copy of original data, and get rid of it
// if not necessary.
void nuclear_approx_obj_grad(DOUBLE *obj, DOUBLE *deriv, DOUBLE *X, DOUBLE *rp, \
					INT M, INT N, INT derivFlag, DOUBLE *svdVec, DOUBLE *vtMat, \
					DOUBLE *dataBuffer, DOUBLE *derivVec, DOUBLE *work, INT lwork) {

	INT MN = IMIN(M, N);

	if (lwork == -1) {
		CHAR jobu;
		CHAR jobvt;
		if (derivFlag == 1) {
			jobu = 'O';
			jobvt = 'S';
		} else {
			jobu = 'N';
			jobvt = 'N';
		}
		INT GESVDM = M;
		INT GESVDN = N;
		INT GESVDLDA = M;
		INT GESVDLDU = M;
		INT GESVDLDVT = MN;
		INT INFO;

		GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, work, &lwork, &INFO);
		if (INFO != 0) {
			PRINTF("Error, INFO = %d. ", INFO);
			ERROR("LAPACK error.");
		}
		return;
	}

	INT svdVecFlag = 0;
	if (svdVec == NULL) {
		svdVec = (DOUBLE *) MALLOC(1 * MN * sizeof(DOUBLE));
		svdVecFlag = 1;
	}

	INT derivVecFlag = 0;
	if ((derivVec == NULL) && (derivFlag == 1)) {
		derivVec = (DOUBLE *) MALLOC(1 * MN * sizeof(DOUBLE));
		derivVecFlag = 1;
	}

	INT vtMatFlag = 0;
	if ((vtMat == NULL) && (derivFlag == 1)) {
		vtMat = (DOUBLE *) MALLOC(MN * N * sizeof(DOUBLE));
		vtMatFlag = 1;
	}

	INT dataBufferFlag = 0;
	if (dataBuffer == NULL) {
		dataBuffer = (DOUBLE *) MALLOC(M * N * sizeof(DOUBLE));
		dataBufferFlag = 1;
	}

	INT workFlag = 0;
	if (work == NULL) {
		workFlag = 1;
	}

	CHAR jobu;
	CHAR jobvt;
	if (derivFlag == 1) {
		jobu = 'O';
		jobvt = 'S';
	} else {
		jobu = 'N';
		jobvt = 'N';
	}
	datacpy(dataBuffer, X, M * N);

	INT GESVDM = M;
	INT GESVDN = N;
	INT GESVDLDA = M;
	INT GESVDLDU = M;
	INT GESVDLDVT = MN;
	INT INFO;

	if (workFlag == 1) {
		lwork = -1;
		DOUBLE work_temp;
		GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, &work_temp, &lwork, &INFO);
		if (INFO != 0) {
			PRINTF("Error, INFO = %d. ", INFO);
			ERROR("LAPACK error.");
		}

		lwork = (INT) work_temp;
		work = (DOUBLE*) MALLOC(lwork * sizeof(DOUBLE));
	}

	GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, work, &lwork, &INFO);
	if (INFO != 0) {
		PRINTF("Error, INFO = %d. ", INFO);
		ERROR("LAPACK error.");
	}

	abs_smooth_obj_grad(svdVec, derivVec, svdVec, rp, MN, derivFlag);
	INT ASUMN = MN;
	INT incx = 1;
	*obj = ASUM(&ASUMN, svdVec, &incx);

	if (derivFlag == 1) {

		INT iterMN;
		INT SCALN = M;
		INT incx = 1;
		DOUBLE alpha;
		for (iterMN = 0; iterMN < MN; ++iterMN) {
			alpha = derivVec[iterMN];
			SCAL(&SCALN, &alpha, &dataBuffer[iterMN * M], &incx);
		}

		CHAR transa = 'N';
		CHAR transb = 'N';
		INT GEMMM = M;
		INT GEMMN = N;
		INT GEMMK = MN;
		alpha = 1.0;
		INT GEMMLDA = M;
		INT GEMMLDB = MN;
		DOUBLE beta = 0.0;
		INT GEMMLDC = M;
		GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, dataBuffer, &GEMMLDA, vtMat, &GEMMLDB, &beta, deriv, &GEMMLDC);
	}

	if (svdVecFlag == 1) {
		FREE(svdVec);
	}

	if (derivVecFlag == 1) {
		FREE(derivVec);
	}

	if (vtMatFlag == 1) {
		FREE(vtMat);
	}

	if (dataBufferFlag == 1) {
		FREE(dataBuffer);
	}

	if (workFlag == 1) {
		FREE(work);
	}
}
Example #7
0
/*
 * This function computes the pseudoinverse of a square matrix A
 * into B using SVD. A and B can coincide
 * 
 * The function returns 0 in case of error (e.g. A is singular),
 * the rank of A if successfull
 *
 * A, B are mxm
 *
 */
static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m)
{
LM_REAL *buf=NULL;
int buf_sz=0;
static LM_REAL eps=CNST(-1.0);

register int i, j;
LM_REAL *a, *u, *s, *vt, *work;
int a_sz, u_sz, s_sz, vt_sz, tot_sz;
LM_REAL thresh, one_over_denom;
int info, rank, worksz, *iwork, iworksz;
   
  /* calculate required memory size */
  worksz=16*m; /* more than needed */
  iworksz=8*m;
  a_sz=m*m;
  u_sz=m*m; s_sz=m; vt_sz=m*m;

  tot_sz=iworksz*sizeof(int) + (a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL);

    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n");
      exit(1);
    }

  iwork=(int *)buf;
  a=(LM_REAL *)(iwork+iworksz);
  /* store A (column major!) into a */
  for(i=0; i<m; i++)
    for(j=0; j<m; j++)
      a[i+j*m]=A[i*m+j];

  u=a + a_sz;
  s=u+u_sz;
  vt=s+s_sz;
  work=vt+vt_sz;

  /* SVD decomposition of A */
  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);

  /* error treatment */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info);
      exit(1);
    }
    else{
      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info);
      free(buf);

      return 0;
    }
  }

  if(eps<0.0){
    LM_REAL aux;

    /* compute machine epsilon */
    for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5))
                                          ;
    eps*=CNST(2.0);
  }

  /* compute the pseudoinverse in B */
	for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */
  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
    one_over_denom=CNST(1.0)/s[rank];

    for(j=0; j<m; j++)
      for(i=0; i<m; i++)
        B[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
  }

  free(buf);

	return rank;
}