int eigen_decomposition(int n, double* X, double *eigvec, double *eigval) { /* This function calculates the eigenvalues and eigenvectors of the n*n symmetric matrix X. The matrices have to be in Fortran vector format. The eigenvectors will be put columnwise in the n*n matrix eigvec, where the corresponding eigenvalues will be put in the vector eigval (length n of course). Only the lower triangle of the matrix X is used. The content of X is not changed. This function first queries the Lapack routines for optimal workspace sizes. These memoryblocks are then allocated and the decomposition is calculated using the Lapack function "dsyevr". The allocated memory is then freed. */ double *WORK, *Xc; int *ISUPPZ, *IWORK; int numeig, info, sizeWORK, sizeIWORK; if (check_input(X, eigvec, "eigen_decomposition")) return 1; /* Use a copy of X so we don't need to change its value or use its memoryblock */ Xc=(double*) malloc(n*n*sizeof(double)); /* The support of the eigenvectors. We will not use this but the routine needs it */ ISUPPZ = (int*) malloc (2*n*sizeof(int)); /* Allocate temporarily minimally allowed size for workspace arrays */ WORK = (double*) malloc (26*n*sizeof(double)); IWORK = (int*) malloc (10*n*sizeof(int)); /* Check for NULL-pointers. */ if ((Xc==NULL)||(ISUPPZ==NULL)||(WORK==NULL)||(IWORK==NULL)) { printf("malloc failed in eigen_decomposition\n"); return 2; } vector_copy(n*n, X, Xc); /* Query the Lapack routine for optimal sizes for workspace arrays */ info=dsyevr ('V', 'A', 'L', n, Xc, n, 0, 0, 0, 0, dlamch('S'), &numeig, eigval, eigvec, n, ISUPPZ, WORK, -1, IWORK, -1); sizeWORK = (int)WORK[0]; sizeIWORK = IWORK[0]; /* Free previous allocation and reallocate preferable workspaces, Check result */ free(WORK);free(IWORK); WORK = (double*) malloc (sizeWORK*sizeof(double)); IWORK = (int*) malloc (sizeIWORK*sizeof(int)); if ((WORK==NULL)||(IWORK==NULL)) { printf("malloc failed in eigen_decomposition\n"); return 2; } /* Now calculate the eigenvalues and vectors using optimal workspaces */ info=dsyevr ('V', 'A', 'L', n, Xc, n, 0, 0, 0, 0, dlamch('S'), &numeig, eigval, eigvec, n, ISUPPZ, WORK, sizeWORK, IWORK, sizeIWORK); /* Cleanup and exit */ free(WORK); free(IWORK); free(ISUPPZ); free(Xc); return info; }
void EigenSystemSolverRealSymmetricMatrix(const Array2 <doublevar > & Ain, Array1 <doublevar> & evals, Array2 <doublevar> & evecs){ //returns eigenvalues from largest to lowest and //eigenvectors, where for i-th eigenvalue, the eigenvector components are evecs(*,i) #ifdef USE_LAPACK //if LAPACK int N=Ain.dim[0]; Array2 <doublevar> Ain2(N,N); //need to copy the array!! Ain2=Ain; /* allocate and initialise the matrix */ Array1 <doublevar> W, Z, WORK; Array1 <int> ISUPPZ, IWORK; int M; /* allocate space for the output parameters and workspace arrays */ W.Resize(N); Z.Resize(N*N); ISUPPZ.Resize(2*N); WORK.Resize(26*N); IWORK.Resize(10*N); int info; /* get the eigenvalues and eigenvectors */ info=dsyevr('V', 'A', 'L', N, Ain2.v, N, 0.0, 0.0, 0, 0, dlamch('S'), &M, W.v, Z.v, N, ISUPPZ.v, WORK.v, 26*N, IWORK.v, 10*N); if(info>0) error("Internal error in the LAPACK routine dsyevr"); if(info<0) error("Problem with the input parameter of LAPACK routine dsyevr in position "-info); for (int i=0; i<N; i++) evals(i)=W[N-1-i]; for (int i=0; i<N; i++) { for (int j=0; j<N; j++) { evecs(j,i)=Z[j+(N-1-i)*N]; } } //END OF LAPACK #else //IF NO LAPACK const int n = Ain.dim[0]; Array2 < dcomplex > Ain_complex(n,n); Array2 <dcomplex> evecs_complex(n,n); for (int i=0; i < n; i++) for (int j=0; j < n; j++) { Ain_complex(i,j)=dcomplex(Ain(i,j),0.0); } Jacobi(Ain_complex, evals, evecs_complex); for (int i=0; i < n; i++) for (int j=0; j < n; j++){ evecs(i,j)=real(evecs_complex(i,j)); } #endif //END OF NO LAPACK }
double nmf_neals(double * a, double * w0, double * h0, int * pm, int * pn, \ int * pk, int * maxiter, const double * pTolX, const double * pTolFun) { // code added to be able to call from R int m = * pm; int n = * pn; int k = * pk; const double TolX = * pTolX; const double TolFun = * pTolFun; // also: changed w0, h0 to simple pointer (instead of double) // // end code added #ifdef PROFILE_NMF_NEALS struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering nmf_neals\n"); #endif #ifdef ERROR_CHECKING errno = 0; #endif double * help1 = (double*) malloc(sizeof(double)*k*k); double * help2 = (double*) malloc(sizeof(double)*k*n); double * help3 = (double*) malloc(sizeof(double)*k*m); //----------------------------------------- // definition of necessary dynamic data structures //...for calculating matrix h double* h = (double*) malloc(sizeof(double)*k*n); int* jpvt_h = (int*) malloc(sizeof(int)*k); int info; //...for calculating matrix w double* w = (double*) malloc(sizeof(double)*m*k); //---------------- //...for calculating the norm of A-W*H double* d = (double*) malloc(sizeof(double)*m*n); //d = a - w*h double dnorm0 = 0; double dnorm = 0; const double eps = dlamch('E'); //machine precision epsilon const double sqrteps = sqrt(eps); //squareroot of epsilon //------------------- #ifdef ERROR_CHECKING if (errno) { perror("Error allocating memory in nmf_neals"); free(help1); free(help2); free(help3); free(h); free(jpvt_h); free(w); free(d); return -1; } #endif // declaration of data structures for switch to als algorithm // ---------------------------------------------------------- int als_data_allocated = 0; // indicates wheter data structures were already allocated // factor matrices for factorizing matrix w double * q; double * r; // factor matrices for factorizing matrix h double * q_h; double * r_h; double* tau_h; //stores elementary reflectors of factor matrix Q double* work_w; //work array for factorization of matrix w int lwork_w; double* work_h; //work array for factorization of matrix h int lwork_h; double * work_qta; //work array for dorgqr int lwork_qta; double * work_qth; //work array for dorgqr int lwork_qth; //query for optimal workspace size for routine dgeqp3... double querysize; //Loop-Indices int iter, i; //variable for storing if fallback happened in current iteration int fallback; // factorisation step in a loop from 1 to maxiter for (iter = 1; iter <= *maxiter; ++iter) { //no fallback in this iteration so far fallback = 0; // calculating matrix h //---------------- //help1 = w0'*w0 dgemm('T', 'N', k, k, m, 1.0, w0, m, w0, m, 0., help1, k); //help2 = w0'*a dgemm('T', 'N', k, n, m, 1.0, w0, m, a, m, 0., help2, k); //LU-Factorisation of help1 to solve equation help1 * x = help2 dgesv(k, n, help1, k, jpvt_h, help2, k, &info); // if factor matrix U is singular -> switch back to als algorithm to compute h if( info > 0) { //set fallback to 1 to indicate that fallback happened fallback = 1; // do dynamic data structures need to be allocated? if (!als_data_allocated) { als_data_allocated = 1; // factor matrices for factorizing matrix w q = (double*) malloc(sizeof(double)*m*k); r = (double*) malloc(sizeof(double)*m*k); // factor matrices for factorizing matrix h q_h = (double*) malloc(sizeof(double)*n*k); r_h = (double*) malloc(sizeof(double)*n*k); tau_h = (double*) malloc(sizeof(double)*k); //stores elementary reflectors of factor matrix Q //query for optimal workspace size for routine dgeqp3... //for matrix w dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info); lwork_w = (int) querysize; work_w = (double*) malloc(sizeof(double)*lwork_w); //work array for factorization of matrix help1 (dgeqp3) //for matrix h dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info); lwork_h = (int) querysize; work_h = (double*) malloc(sizeof(double)*lwork_h); //work array for factorization of matrix h //query for optimal workspace size for routine dorgqr... //for matrix w dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info); lwork_qta = (int)querysize; work_qta = (double*) malloc(sizeof(double)*lwork_qta); //work array for dorgqr //for matrix h dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info); lwork_qth = (int)querysize; work_qth = (double*) malloc(sizeof(double)*lwork_qth); } // calculating matrix h //---------------- //re-initialization //copy *w0 to q dlacpy('A', m, k, w0, m, q, m); //initialise jpvt_h to 0 -> every column free for (i = 0; i<k; ++i) jpvt_h[i] = 0; // Q-R factorization with column pivoting dgeqp3(m, k, q, m, jpvt_h, tau_h, work_w, lwork_w, &info); //copying upper triangular factor-matrix r out of q into r dlacpy('U', m, k, q, m, r, k); //Begin of least-squares-solution to w0 * x = a //generate explicit matrix q (m times k) and calculate q' * a dorgqr(m, k, k, q, m, tau_h, work_qta, lwork_qta, &info); dgemm('T', 'N', k, n, m, 1.0, q, m, a, m, 0.0, q_h, k); //solve R * x = (Q'*A) dtrtrs('U','N','N',k,n,r,k,q_h,k,&info); //copy matrix q to h, but permutated according to jpvt_h for (i=0; i<k; ++i) { dcopy(n, q_h + i, k, h + jpvt_h[i] - 1, k); } //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint for (i=0; i<k*n; ++i) { if (h[i] < ZERO_THRESHOLD) h[i] = 0.; } } else { //h = max(ZERO_THRESHOLD, help1\help2) for (i=0; i < k*n; ++i) h[i] = (help2[i] > ZERO_THRESHOLD ? help2[i] : 0.); } // calculating matrix w = max(0, help1\help3)' //---------------------------- //help1 = h*h' dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., help1, k); //help3 = h*a' dgemm('N', 'T', k, m, n, 1.0, h, k, a, m, 0., help3, k); //LU-Factorisation of help1 dgesv(k, m, help1, k, jpvt_h, help3, k, &info); // if( info > 0) { // do dynamic data structures need to be allocated? if (!als_data_allocated) { als_data_allocated = 1; // factor matrices for factorizing matrix w q = (double*) malloc(sizeof(double)*m*k); r = (double*) malloc(sizeof(double)*m*k); // factor matrices for factorizing matrix h q_h = (double*) malloc(sizeof(double)*n*k); r_h = (double*) malloc(sizeof(double)*n*k); tau_h = (double*) malloc(sizeof(double)*k); //stores elementary reflectors of factor matrix Q //query for optimal workspace size for routine dgeqp3... //for matrix w dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info); lwork_w = (int) querysize; work_w = (double*) malloc(sizeof(double)*lwork_w); //work array for factorization of matrix help1 (dgeqp3) //..for matrix h dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info); lwork_h = (int) querysize; work_h = (double*) malloc(sizeof(double)*lwork_h); //work array for factorization of matrix h //query for optimal workspace size for routine dorgqr... //for matrix w dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info); lwork_qta = (int)querysize; work_qta = (double*) malloc(sizeof(double)*lwork_qta); //work array for dorgqr // ... for matrix h dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info); lwork_qth = (int)querysize; work_qth = (double*) malloc(sizeof(double)*lwork_qth); } //calculating matrix w //copy original matrix h to q_h, but transposed for (i=0; i<k; ++i) { dcopy(n, h + i, k, q_h + i*n, 1); } //initialise jpvt_a to 0 -> every column free for (i = 0; i<k; ++i) jpvt_h[i] = 0; //Q-R factorization dgeqp3(n, k, q_h, n, jpvt_h, tau_h, work_h, lwork_h, &info); //copying upper triangular factor-matrix r_h out of q into r_h dlacpy('U', n, k, q_h, n, r_h, k); //Begin of least-squares-solution to w0 * x = a //generate explicit matrix q (n times k) and calculate *a = q' * a' dorgqr(n, k, k, q_h, n, tau_h, work_qth, lwork_qth, &info); dgemm('T', 'T', k, m, n, 1.0, q_h, n, a, m, 0.0, q, k); //solve R_h * x = (Q'*A') dtrtrs('U', 'N', 'N', k, m, r_h, k, q, k, &info); //jpvt_h*(R\(Q'*A')) permutation and transposed copy to w for (i=0; i<k; ++i) { dcopy(m, q + i, k, w + m * (jpvt_h[i] - 1), 1); } //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint for (i=0; i<k*m; ++i) { if (w[i] < ZERO_THRESHOLD) w[i] = 0.; } } else { //w = max(0, help3)' for (i=0; i<k; ++i) { dcopy(m, help3 + i, k, w + i*m, 1); } for (i=0; i<m*k; ++i) { if (w[i] < ZERO_THRESHOLD) w[i] = 0.; } } // calculating the norm of D = A-W*H dnorm = calculateNorm(a, w, h, d, m, n, k); // calculating change in w -> dw //---------------------------------- double dw; dw = calculateMaxchange(w, w0, m, k, sqrteps); // calculating change in h -> dh //----------------------------------- double dh; dh = calculateMaxchange(h, h0, k, n, sqrteps); //Max-Change = max(dh, dw) = delta double delta; delta = (dh > dw) ? dh : dw; // storing the matrix results of the current iteration swap(&w0, &w); swap(&h0, &h); // storing the norm results of the current iteration dnorm0 = dnorm; #if DEBUG_LEVEL >= 1 printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta); #endif //Check for Convergence if (iter > 1) { if (delta < TolX) { *maxiter = iter; break; } else if (dnorm <= TolFun*dnorm0) { *maxiter = iter; break; } } } //end of loop from 1 to maxiter #if DEBUG_LEVEL >= 2 printf("Exiting nmf_neals\n"); #endif #ifdef PROFILE_NMF_NEALS gettimeofday(&end, 0); outputTiming("", start, end); #endif // freeing memory if used free(help1); free(help2); free(help3); free(h); free(jpvt_h); free(w); free(d); if(als_data_allocated) { free(q); free(r); free(q_h); free(r_h); free(work_h); free(work_w); free(tau_h); free(work_qta); free(work_qth); } // returning calculated norm return dnorm; }
double nmf_mu(double * a, double ** w0, double ** h0, int m, int n, \ int k, int * maxiter, const double TolX, const double TolFun) { #ifdef PROFILE_NMF_MU struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering nmf_mu\n"); #endif #ifdef ERROR_CHECKING errno = 0; #endif // definition of necessary dynamic data structures //...for calculating matrix h double* numerh = (double*) malloc(sizeof(double) *k*n); double* work1 = (double*) malloc(sizeof(double)*k*k); // used for calculation of h & w double* work2 = (double*) malloc(sizeof(double)*k*n); double* h = (double*) malloc(sizeof(double)*k*n); //---------------- //...for calculating matrix w double* numerw = (double*) malloc(sizeof(double)*m*k); double* work2w = (double*) malloc(sizeof(double)*m*k); double* w = (double*) malloc(sizeof(double)*m*k); //----------------- //...for calculating the norm of A-W*H double* d = (double*) malloc(sizeof(double)*m*n); //d = a - w*h double dnorm0 = 0.; double dnorm = 0.; const double eps = dlamch('E'); //machine precision epsilon const double sqrteps = sqrt(eps); //squareroot of epsilon //------------------- #ifdef ERROR_CHECKING if (errno) { perror("Error allocating memory in nmf_mu"); free(numerh); free(numerw); free(work1); free(work2); free(h); free(w); free(work2w); free(d); return -1; } #endif //Is ZERO_THRESHOLD _not_ defined then use machine epsilon as default #ifndef ZERO_THRESHOLD const double ZERO_THRESHOLD = eps; #endif //Loop-Indices int iter, i; // factorisation step in a loop from 1 to maxiter for (iter = 1; iter <= *maxiter; ++iter) { // calculating matrix h //---------------- // calculating numerh = w0'*a dgemm('T', 'N', k, n, m, 1.0, *w0, m, a, m, 0., numerh, k); // calculating first intermediate result work1 = w0'*w0 dgemm('T', 'N', k, k, m, 1.0, *w0, m, *w0, m, 0., work1, k); // calculating second intermediate result work2 = work1 * h0 dgemm('N', 'N', k, n, k, 1.0, work1, k, *h0, k, 0., work2, k); //calculating elementwise matrixmultiplication, Division and addition h = h0 .* (numerh ./(work2 + eps)) //set elements < zero_threshold to zero double tmp_element; for(i = 0; i< k*n; ++i) { if ( (*h0)[i] == 0. || numerh[i] == 0.) h[i] = 0.; else { tmp_element = (*h0)[i] * (numerh[i] / (work2[i] + DIV_BY_ZERO_AVOIDANCE)); h[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element; } } // calculating matrix w //---------------------------- // calculating numerw = a*h' dgemm('N', 'T', m, k, n, 1.0, a, m, h, k, 0., numerw, m); // calculating first intermediate result work1 = h*h' (kxk-Matrix) => re-use of work1 dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., work1, k); // calculating second intermediate result work2w = w0 * work1 dgemm('N', 'N', m, k, k, 1.0, *w0, m, work1, k, 0., work2w, m); //calculating elementwise matrixmultiplication, Division and addition w = w0 .* (numerw ./ (work2w + eps)) //set elements < zero_threshold to zero for(i = 0; i < m*k; ++i) { if ( (*w0)[i] == 0. || numerw[i] == 0.) w[i] = 0.; else { tmp_element = (*w0)[i] * (numerw[i] / (work2w[i] + DIV_BY_ZERO_AVOIDANCE)); w[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element; } } // calculating the norm of D = A-W*H dnorm = calculateNorm(a, w, h, d, m, n, k); // calculating change in w -> dw //---------------------------------- double dw = calculateMaxchange(w, *w0, m, k, sqrteps); // calculating change in h -> dh //----------------------------------- double dh = calculateMaxchange(h, *h0, k, n, sqrteps); //Max-Change = max(dh, dw) = delta double delta = 0.0; delta = (dh > dw) ? dh : dw; // storing the matrix results of the current iteration in W0 respectively H0 swap(w0, &w); swap(h0, &h); //Check for Convergence if (iter > 1) { if (delta < TolX) { *maxiter = iter; break; } else if (dnorm <= TolFun*dnorm0) { *maxiter = iter; break; } } // storing the norm result of the current iteration dnorm0 = dnorm; #if DEBUG_LEVEL >= 1 printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta); #endif } //end of loop from 1 to maxiter #if DEBUG_LEVEL >= 2 printf("Exiting nmf_mu\n"); #endif #ifdef PROFILE_NMF_MU gettimeofday(&end, 0); outputTiming("", start, end); #endif // freeing memory if used free(numerh); free(numerw); free(work1); free(work2); free(h); free(w); free(work2w); free(d); // returning calculated norm return dnorm; }