//============================================================================= int Epetra_SerialDenseSVD::Factor() { int ierr = 0; ANORM_ = Matrix_->OneNorm(); // Compute 1-Norm of A //allocate U_, S_, and Vt_ if not already done if(U_==0) { U_ = new double[M_*N_]; S_ = new double[M_]; Vt_ = new double[M_*N_]; } else //zero them out { for( int i = 0; i < M_; ++i ) S_[i]=0.0; for( int i = 0; i < M_*N_; ++i ) { U_[i]=0.0; Vt_[i]=0.0; } } // if (Equilibrate_) ierr = EquilibrateMatrix(); if (ierr!=0) EPETRA_CHK_ERR(ierr-2); //allocate temp work space int lwork = 5*M_; double *work = new double[lwork]; char job = 'A'; //create temporary matrix to avoid writeover of original Epetra_SerialDenseMatrix tempMat( *Matrix_ ); GESVD( job, job, M_, N_, tempMat.A(), LDA_, S_, U_, N_, Vt_, M_, work, &lwork, &INFO_ ); delete [] work; Factored_ = true; double DN = N_; UpdateFlops(2.0*(DN*DN*DN)/3.0); EPETRA_CHK_ERR(INFO_); return(0); }
// Computes an SVD factorization of a real MxN matrix. // Returns the vector of singular values. // Also, factors U, VT, where A = U * D * VT. //--------------------------------------------------------- DVec& svd ( const DMat& mat, // [in] DMat& U, // [out: left singular vectors] DMat& VT, // [out: right singular vectors] char ju, // [in: want U?] char jvt // [in: want VT?] ) //--------------------------------------------------------- { // Work with a copy of the input matrix. DMat A(mat, OBJ_temp, "svd.TMP"); // A(MxN) int m=A.num_rows(), n=A.num_cols(); int mmn=A.min_mn(), xmn=A.max_mn(); // resize parameters U.resize (m,m, true, 0.0); VT.resize(n,n, true, 0.0); DVec* s = new DVec(mmn, 0.0, OBJ_temp, "s.TMP"); char jobu = ju; char jobvt = jvt; int info = 0; // NBN: ACML does not use the work vector. int lwork = 2 * std::max(3*mmn+xmn, 5*mmn); DVec work(lwork, 0.0, OBJ_temp, "work.TMP"); GESVD (jobu, jobvt, m, n, A.data(), m, s->data(), U.data(), m, VT.data(), n, work.data(), lwork, info); if (info < 0) { (*s) = 0.0; umERROR("SVD", "Error in input argument (%d)\nNo solution computed.", -info); } else if (info > 0) { (*s) = 0.0; umLOG(1, "DBDSQR did not converge." "\n%d superdiagonals of an intermediate bidiagonal" "\nform B did not converge to zero.\n", info); } return (*s); }
/* * This function returns the solution of Ax = b * * The function is based on SVD decomposition: * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes * (U D V^T) x = b or x=V D^{-1} U^T b * Note that V D^{-1} U^T is the pseudoinverse A^+ * * A is mxm, b is mx1. * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; static LM_REAL eps=LM_CNST(-1.0); register int i, j; LM_REAL *a, *u, *s, *vt, *work; int a_sz, u_sz, s_sz, vt_sz, tot_sz; LM_REAL thresh, one_over_denom; register LM_REAL sum; int info, rank, worksz, *iwork, iworksz; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ #if 1 /* use optimal size */ worksz=-1; // workspace query. Keep in mind that GESDD requires more memory than GESVD /* note that optimal work size is returned in thresh */ GESVD("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, &info); //GESDD("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, NULL, &info); worksz=(int)thresh; #else /* use minimum size */ worksz=5*m; // min worksize for GESVD //worksz=m*(7*m+4); // min worksize for GESDD #endif iworksz=8*m; a_sz=m*m; u_sz=m*m; s_sz=m; vt_sz=m*m; tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; u=a+a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; iwork=(int *)(work+worksz); /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; /* SVD decomposition of A */ GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info); //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } if(eps<0.0){ LM_REAL aux; /* compute machine epsilon */ for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5)) ; eps*=LM_CNST(2.0); } /* compute the pseudoinverse in a */ for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */ for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){ one_over_denom=LM_CNST(1.0)/s[rank]; for(j=0; j<m; j++) for(i=0; i<m; i++) a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom; } /* compute A^+ b in x */ for(i=0; i<m; i++){ for(j=0, sum=0.0; j<m; j++) sum+=a[i*m+j]*B[j]; x[i]=sum; } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
/* * This function returns the solution of Ax = b * * The function is based on SVD decomposition: * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes * (U D V^T) x = b or x=V D^{-1} U^T b * Note that V D^{-1} U^T is the pseudoinverse A^+ * * A is mxm, b is mx1. * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; static LM_REAL eps=CNST(-1.0); register int i, j; LM_REAL *a, *u, *s, *vt, *work; int a_sz, u_sz, s_sz, vt_sz, tot_sz; LM_REAL thresh, one_over_denom; register LM_REAL sum; blasint info, rank, worksz, *iwork, iworksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(!A){ if(buf) free(buf); buf_sz=0; return 1; } #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ worksz=16*m; /* more than needed */ iworksz=8*m; a_sz=m*m; u_sz=m*m; s_sz=m; vt_sz=m*m; tot_sz=iworksz*sizeof(blasint) + (a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL); #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ iwork=(blasint *)buf; a=(LM_REAL *)(iwork+iworksz); /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; u=a + a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; const blasint mm = m; /* SVD decomposition of A */ GESVD("A", "A", (blasint *)&mm, (blasint *)&mm, a, (blasint *)&mm, s, u, (blasint *)&mm, vt, (blasint *)&mm, work, (blasint *)&worksz, &info); //GESDD("A", (blasint *)&mm, (blasint *)&mm, a, (blasint *)&mm, s, u, (blasint *)&mm, vt, (blasint *)&mm, work, (blasint *)&worksz, iwork, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%ld]\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } if(eps<0.0){ LM_REAL aux; /* compute machine epsilon */ for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5)) ; eps*=CNST(2.0); } /* compute the pseudoinverse in a */ for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */ for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){ one_over_denom=CNST(1.0)/s[rank]; for(j=0; j<m; j++) for(i=0; i<m; i++) a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom; } /* compute A^+ b in x */ for(i=0; i<m; i++){ for(j=0, sum=0.0; j<m; j++) sum+=a[i*m+j]*B[j]; x[i]=sum; } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
void nuclear_hard_thresholding(DOUBLE *X, DOUBLE *norm, INT rank, INT M, INT N, DOUBLE *sv, \ DOUBLE *svecsmall, DOUBLE *sveclarge, DOUBLE *work, INT lwork) { INT MINMN = IMIN(M, N); INT MAXMN = IMAX(M, N); INT svFlag = 0; if (sv == NULL) { sv = (DOUBLE *) malloc(MINMN * 1 * sizeof(DOUBLE)); svFlag = 1; } INT svecsmallFlag = 0; if (svecsmall == NULL) { svecsmall = (DOUBLE *) malloc(MINMN * MINMN * sizeof(DOUBLE)); svecsmallFlag = 1; } INT sveclargeFlag = 0; if (sveclarge == NULL) { sveclarge = (DOUBLE *) malloc(MAXMN * MINMN * sizeof(DOUBLE)); sveclargeFlag = 1; } CHAR jobu = 'S'; CHAR jobvt = 'S'; DOUBLE *u; DOUBLE *vt; if (MAXMN == M) { u = sveclarge; vt = svecsmall; } else { u = svecsmall; vt = sveclarge; } INT GESVDM = M; INT GESVDN = N; INT GESVDLDA = M; INT GESVDLDU = M; INT GESVDLDVT = MINMN; INT info; if (lwork == -1) { GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, work, &lwork, &info); if (svFlag == 1) { free(sv); } if (svecsmallFlag == 1) { free(svecsmall); } if (sveclargeFlag == 1) { free(sveclarge); } return; } INT workFlag = 0; if (lwork == 0) { DOUBLE workTemp; lwork = -1; GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, &workTemp, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } lwork = (INT) workTemp; work = (DOUBLE *) malloc(lwork * 1 * sizeof(DOUBLE)); workFlag = 1; } GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, work, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } INT iterMN; DOUBLE normtemp = 0; for (iterMN = 0; iterMN < rank; ++iterMN) { normtemp += sv[iterMN]; } if (norm != NULL) { *norm = normtemp; } for (iterMN = rank; iterMN < MINMN; ++iterMN) { sv[iterMN] = 0; } /* * TODO: Only multiply for singular vectors corresponding to non-zero singular values. */ if (MAXMN == M) { INT SCALN = M; INT incx = 1; for (iterMN = 0; iterMN < MINMN; ++iterMN) { SCAL(&SCALN, &sv[iterMN], &u[iterMN * M], &incx); } CHAR transa = 'N'; CHAR transb = 'N'; INT GEMMM = M; INT GEMMN = N; INT GEMMK = MINMN; DOUBLE alpha = 1; INT GEMMLDA = M; INT GEMMLDB = MINMN; DOUBLE beta = 0; INT GEMMLDC = M; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, u, &GEMMLDA, vt, &GEMMLDB, &beta, X, &GEMMLDC); } else { INT SCALN = M; INT incx = 1; for (iterMN = 0; iterMN < MINMN; ++iterMN) { SCAL(&SCALN, &sv[iterMN], &u[iterMN * M], &incx); } CHAR transa = 'N'; CHAR transb = 'N'; INT GEMMM = M; INT GEMMN = N; INT GEMMK = MINMN; DOUBLE alpha = 1; INT GEMMLDA = M; INT GEMMLDB = MINMN; DOUBLE beta = 0; INT GEMMLDC = M; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, u, &GEMMLDA, vt, &GEMMLDB, &beta, X, &GEMMLDC); } if (svFlag == 1) { free(sv); } if (svecsmallFlag == 1) { free(svecsmall); } if (sveclargeFlag == 1) { free(sveclarge); } if (workFlag == 1) { free(work); } }
// TODO: Change so that adaptive depending on M and N // TODO: Check what standard I use for copy of original data, and get rid of it // if not necessary. void nuclear_approx_obj_grad(DOUBLE *obj, DOUBLE *deriv, DOUBLE *X, DOUBLE *rp, \ INT M, INT N, INT derivFlag, DOUBLE *svdVec, DOUBLE *vtMat, \ DOUBLE *dataBuffer, DOUBLE *derivVec, DOUBLE *work, INT lwork) { INT MN = IMIN(M, N); if (lwork == -1) { CHAR jobu; CHAR jobvt; if (derivFlag == 1) { jobu = 'O'; jobvt = 'S'; } else { jobu = 'N'; jobvt = 'N'; } INT GESVDM = M; INT GESVDN = N; INT GESVDLDA = M; INT GESVDLDU = M; INT GESVDLDVT = MN; INT INFO; GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, work, &lwork, &INFO); if (INFO != 0) { PRINTF("Error, INFO = %d. ", INFO); ERROR("LAPACK error."); } return; } INT svdVecFlag = 0; if (svdVec == NULL) { svdVec = (DOUBLE *) MALLOC(1 * MN * sizeof(DOUBLE)); svdVecFlag = 1; } INT derivVecFlag = 0; if ((derivVec == NULL) && (derivFlag == 1)) { derivVec = (DOUBLE *) MALLOC(1 * MN * sizeof(DOUBLE)); derivVecFlag = 1; } INT vtMatFlag = 0; if ((vtMat == NULL) && (derivFlag == 1)) { vtMat = (DOUBLE *) MALLOC(MN * N * sizeof(DOUBLE)); vtMatFlag = 1; } INT dataBufferFlag = 0; if (dataBuffer == NULL) { dataBuffer = (DOUBLE *) MALLOC(M * N * sizeof(DOUBLE)); dataBufferFlag = 1; } INT workFlag = 0; if (work == NULL) { workFlag = 1; } CHAR jobu; CHAR jobvt; if (derivFlag == 1) { jobu = 'O'; jobvt = 'S'; } else { jobu = 'N'; jobvt = 'N'; } datacpy(dataBuffer, X, M * N); INT GESVDM = M; INT GESVDN = N; INT GESVDLDA = M; INT GESVDLDU = M; INT GESVDLDVT = MN; INT INFO; if (workFlag == 1) { lwork = -1; DOUBLE work_temp; GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, &work_temp, &lwork, &INFO); if (INFO != 0) { PRINTF("Error, INFO = %d. ", INFO); ERROR("LAPACK error."); } lwork = (INT) work_temp; work = (DOUBLE*) MALLOC(lwork * sizeof(DOUBLE)); } GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, work, &lwork, &INFO); if (INFO != 0) { PRINTF("Error, INFO = %d. ", INFO); ERROR("LAPACK error."); } abs_smooth_obj_grad(svdVec, derivVec, svdVec, rp, MN, derivFlag); INT ASUMN = MN; INT incx = 1; *obj = ASUM(&ASUMN, svdVec, &incx); if (derivFlag == 1) { INT iterMN; INT SCALN = M; INT incx = 1; DOUBLE alpha; for (iterMN = 0; iterMN < MN; ++iterMN) { alpha = derivVec[iterMN]; SCAL(&SCALN, &alpha, &dataBuffer[iterMN * M], &incx); } CHAR transa = 'N'; CHAR transb = 'N'; INT GEMMM = M; INT GEMMN = N; INT GEMMK = MN; alpha = 1.0; INT GEMMLDA = M; INT GEMMLDB = MN; DOUBLE beta = 0.0; INT GEMMLDC = M; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, dataBuffer, &GEMMLDA, vtMat, &GEMMLDB, &beta, deriv, &GEMMLDC); } if (svdVecFlag == 1) { FREE(svdVec); } if (derivVecFlag == 1) { FREE(derivVec); } if (vtMatFlag == 1) { FREE(vtMat); } if (dataBufferFlag == 1) { FREE(dataBuffer); } if (workFlag == 1) { FREE(work); } }
/* * This function computes the pseudoinverse of a square matrix A * into B using SVD. A and B can coincide * * The function returns 0 in case of error (e.g. A is singular), * the rank of A if successfull * * A, B are mxm * */ static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m) { LM_REAL *buf=NULL; int buf_sz=0; static LM_REAL eps=CNST(-1.0); register int i, j; LM_REAL *a, *u, *s, *vt, *work; int a_sz, u_sz, s_sz, vt_sz, tot_sz; LM_REAL thresh, one_over_denom; int info, rank, worksz, *iwork, iworksz; /* calculate required memory size */ worksz=16*m; /* more than needed */ iworksz=8*m; a_sz=m*m; u_sz=m*m; s_sz=m; vt_sz=m*m; tot_sz=iworksz*sizeof(int) + (a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL); buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n"); exit(1); } iwork=(int *)buf; a=(LM_REAL *)(iwork+iworksz); /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; u=a + a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; /* SVD decomposition of A */ GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info); //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info); free(buf); return 0; } } if(eps<0.0){ LM_REAL aux; /* compute machine epsilon */ for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5)) ; eps*=CNST(2.0); } /* compute the pseudoinverse in B */ for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */ for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){ one_over_denom=CNST(1.0)/s[rank]; for(j=0; j<m; j++) for(i=0; i<m; i++) B[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom; } free(buf); return rank; }