void storeMatrix(const char *fileName, const int m, const int n, const double *matrix) { #ifdef PROFILE_STORE_MATRIX struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering storeMatrix\n"); #endif #ifdef ERROR_CHECKING errno = 0; //no error occured so far #endif int i, j; FILE *file = fopen(fileName, "w"); #ifdef ERROR_CHECKING if (errno) { perror(fileName); return; } #endif fprintf(file, "%d\n", m); fprintf(file, "%d\n", n); for(i = 0; i < m; i++) { for(j = 0; j < n; j++) { fprintf(file, "%.15e ", matrix[i + j * m]); } fprintf(file, "\n"); } fclose(file); #if DEBUG_LEVEL >= 2 printf("Exiting storeMatrix\n"); #endif #ifdef PROFILE_STORE_MATRIX gettimeofday(&end, 0); outputTiming("Timing:", start, end); #endif }
void nmfDriver(const char* a, const int k, int iter, const char* w0, const char* h0, alg_t alg, options_t * opts) { #ifdef PROFILE_NMF_DRIVER struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering nmfDriver\n"); #endif #ifdef ERROR_CHECKING errno = 0; //no error occured so far #endif options_t default_opts; //Explicit options or Defaultoptions (if opts = NULL) if (!opts) { set_default_opts(&default_opts); opts = &default_opts; } // checking arguments if (checkArguments(a, k, iter, w0, h0, opts)) { perror("Arguments invalid in nmfDriver"); return; } // declaring basic variables to load matrices int m, n, m_w0, n_w0, m_h0, n_h0; double * A, *W0, *H0; W0 = NULL; H0 = NULL; loadMatrix(a, &m, &n, &A); if (errno) { perror("Loading matrix A failed in nmfDriver"); free(A); return; } if (k > m || k > n) { errno = EDOM; perror("Approximation factor k bigger than original matrix dimensions."); free(A); return; } if (w0 && h0) { loadMatrix(w0, &m_w0, &n_w0, &W0); loadMatrix(h0, &m_h0, &n_h0, &H0); } else { generateMatrix(m, n, k, opts->init, opts->min_init, opts->max_init, &W0, &H0, A, opts); } #ifdef ERROR_CHECKING if(errno) { perror("Loading matrix W0 failed in nmfDriver"); perror("Loading matrix H0 failed in nmfDriver"); free(H0); free(A); free(W0); return; } #endif #ifdef PROFILE_MATLAB_COMPARISON struct timeval start_matlab, end_matlab; gettimeofday(&start_matlab, 0); #endif if (checkMatrices(A, W0, H0, m, n, k)) { errno = EDOM; perror("Matrices not compatible in nmfDriver"); free(A); free(W0); free(H0); return; } //memory for saving matrices w and h with the best norm double * wbest = (double*) malloc(sizeof(double)*m*k); double * hbest = (double*) malloc(sizeof(double)*k*n); //memory for normalizing final matrices wbest and hbest double * hlen = (double*) malloc(sizeof(double)*k); idx_double * wlen = (idx_double*) malloc(sizeof(idx_double)*k); //stores the indices of column-sums as well for resorting wbest/hbest #ifdef ERROR_CHECKING if(errno) { perror("Failed allocating memory in nmfDriver"); free(A); free(W0); free(H0); free(wbest); free(hbest); free(hlen); free(wlen); return; } #endif double norm = 0.0; norm = HUGE_VAL; double normbest = 0.0; normbest = HUGE_VAL; int repetitions; for (repetitions = 1; repetitions <= opts->rep; ++repetitions) { switch (alg) { case mu: norm = nmf_mu(A, &W0, &H0, m, n, k, &iter, opts->TolX, opts->TolFun); break; case als: norm = nmf_als(&A, &W0, &H0, m, n, k, &iter, opts->TolX, opts->TolFun); break; case neals: norm = nmf_neals(&A, &W0, &H0, m, n, k, &iter, opts->TolX, opts->TolFun); break; case alspg: norm = nmf_alspg(&A, &W0, &H0, m, n, k, &iter, opts->TolX); break; case pg: norm = nmf_pg(&A, &W0, &H0, m, n, k, &iter, opts->TolX); break; } //storing best matrices w and h so far, if the norm is the best so far if (norm < normbest) { normbest = norm; swap(&wbest, &W0); swap(&hbest, &H0); } //re-initialise the starting matrices w0 and h0, if there are more repetitions to be run if (repetitions < opts->rep) { generateMatrix(m, n, k, ran, opts->min_init, opts->max_init, &W0, &H0, A, opts); } } //end of loop from 1 to rep #if DEBUG_LEVEL >= 0 //Output final norm printf("Final Norm: %.16f\n", normbest); #endif //normalizing results //------------------- double temp; temp = 0.; int i, j; //calculating hlen for (i = 0; i<k; ++i) { temp = 0.; for (j = 0; j<n; ++j) { temp += pow(hbest[i + j*k], 2); } temp = sqrt(temp); if (temp == 0.) { hlen[i] = 1.; fprintf(stderr, "Warning: Matrix H doesn't have full rank\n"); } else hlen[i] = temp; } //wbest = wbest .* hlen' for (i=0; i<m; ++i) { for (j=0; j<k; ++j) { wbest[i + j*m] *= hlen[j]; } } //hbest = hbest ./ hlen for (j=0; j<n; ++j) for (i=0; i<k; ++i) hbest[i + j*k] /= hlen[i]; //Calculating wlen for sorting columns of w and rows of h for(j = 0; j<k; ++j) { temp = 0; for(i=0; i<m; ++i) { temp += wbest[i+j*m] * wbest[i+j*m]; } wlen[j].val = temp; wlen[j].idx = j; } //sort wlen in descending order int elementsize = 0; elementsize = sizeof(idx_double); qsort(wlen, k, elementsize, cmpidx_double); //resorting columns of wbest according to order in wlen for(j=0; j<k; ++j) { for(i=0; i<m; ++i) { W0[i + j*m] = wbest[i + wlen[j].idx*m]; } } //resorting rows of hbest according to order in wlen for(j=0; j<n; ++j) { for(i=0; i<k; ++i) { H0[i + j*k] = hbest[wlen[i].idx + j*k]; } } #ifdef PROFILE_MATLAB_COMPARISON gettimeofday(&end_matlab, 0); outputTiming("Timing of MATLAB_COMPARISON:", start_matlab, end_matlab); #endif //storing final results in files if (opts->w_out) storeMatrix(opts->w_out, m, k, W0); if (opts->h_out) storeMatrix(opts->h_out, k, n, H0); //freeing memory of dynamic variables free(A); free(W0); free(H0); free(wbest); free(hbest); free(hlen); free(wlen); #if DEBUG_LEVEL >= 2 printf("Exiting nmfDriver\n"); #endif #ifdef PROFILE_NMF_DRIVER gettimeofday(&end, 0); outputTiming("Timing:", start, end); #endif }
double nmf_neals(double * a, double * w0, double * h0, int * pm, int * pn, \ int * pk, int * maxiter, const double * pTolX, const double * pTolFun) { // code added to be able to call from R int m = * pm; int n = * pn; int k = * pk; const double TolX = * pTolX; const double TolFun = * pTolFun; // also: changed w0, h0 to simple pointer (instead of double) // // end code added #ifdef PROFILE_NMF_NEALS struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering nmf_neals\n"); #endif #ifdef ERROR_CHECKING errno = 0; #endif double * help1 = (double*) malloc(sizeof(double)*k*k); double * help2 = (double*) malloc(sizeof(double)*k*n); double * help3 = (double*) malloc(sizeof(double)*k*m); //----------------------------------------- // definition of necessary dynamic data structures //...for calculating matrix h double* h = (double*) malloc(sizeof(double)*k*n); int* jpvt_h = (int*) malloc(sizeof(int)*k); int info; //...for calculating matrix w double* w = (double*) malloc(sizeof(double)*m*k); //---------------- //...for calculating the norm of A-W*H double* d = (double*) malloc(sizeof(double)*m*n); //d = a - w*h double dnorm0 = 0; double dnorm = 0; const double eps = dlamch('E'); //machine precision epsilon const double sqrteps = sqrt(eps); //squareroot of epsilon //------------------- #ifdef ERROR_CHECKING if (errno) { perror("Error allocating memory in nmf_neals"); free(help1); free(help2); free(help3); free(h); free(jpvt_h); free(w); free(d); return -1; } #endif // declaration of data structures for switch to als algorithm // ---------------------------------------------------------- int als_data_allocated = 0; // indicates wheter data structures were already allocated // factor matrices for factorizing matrix w double * q; double * r; // factor matrices for factorizing matrix h double * q_h; double * r_h; double* tau_h; //stores elementary reflectors of factor matrix Q double* work_w; //work array for factorization of matrix w int lwork_w; double* work_h; //work array for factorization of matrix h int lwork_h; double * work_qta; //work array for dorgqr int lwork_qta; double * work_qth; //work array for dorgqr int lwork_qth; //query for optimal workspace size for routine dgeqp3... double querysize; //Loop-Indices int iter, i; //variable for storing if fallback happened in current iteration int fallback; // factorisation step in a loop from 1 to maxiter for (iter = 1; iter <= *maxiter; ++iter) { //no fallback in this iteration so far fallback = 0; // calculating matrix h //---------------- //help1 = w0'*w0 dgemm('T', 'N', k, k, m, 1.0, w0, m, w0, m, 0., help1, k); //help2 = w0'*a dgemm('T', 'N', k, n, m, 1.0, w0, m, a, m, 0., help2, k); //LU-Factorisation of help1 to solve equation help1 * x = help2 dgesv(k, n, help1, k, jpvt_h, help2, k, &info); // if factor matrix U is singular -> switch back to als algorithm to compute h if( info > 0) { //set fallback to 1 to indicate that fallback happened fallback = 1; // do dynamic data structures need to be allocated? if (!als_data_allocated) { als_data_allocated = 1; // factor matrices for factorizing matrix w q = (double*) malloc(sizeof(double)*m*k); r = (double*) malloc(sizeof(double)*m*k); // factor matrices for factorizing matrix h q_h = (double*) malloc(sizeof(double)*n*k); r_h = (double*) malloc(sizeof(double)*n*k); tau_h = (double*) malloc(sizeof(double)*k); //stores elementary reflectors of factor matrix Q //query for optimal workspace size for routine dgeqp3... //for matrix w dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info); lwork_w = (int) querysize; work_w = (double*) malloc(sizeof(double)*lwork_w); //work array for factorization of matrix help1 (dgeqp3) //for matrix h dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info); lwork_h = (int) querysize; work_h = (double*) malloc(sizeof(double)*lwork_h); //work array for factorization of matrix h //query for optimal workspace size for routine dorgqr... //for matrix w dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info); lwork_qta = (int)querysize; work_qta = (double*) malloc(sizeof(double)*lwork_qta); //work array for dorgqr //for matrix h dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info); lwork_qth = (int)querysize; work_qth = (double*) malloc(sizeof(double)*lwork_qth); } // calculating matrix h //---------------- //re-initialization //copy *w0 to q dlacpy('A', m, k, w0, m, q, m); //initialise jpvt_h to 0 -> every column free for (i = 0; i<k; ++i) jpvt_h[i] = 0; // Q-R factorization with column pivoting dgeqp3(m, k, q, m, jpvt_h, tau_h, work_w, lwork_w, &info); //copying upper triangular factor-matrix r out of q into r dlacpy('U', m, k, q, m, r, k); //Begin of least-squares-solution to w0 * x = a //generate explicit matrix q (m times k) and calculate q' * a dorgqr(m, k, k, q, m, tau_h, work_qta, lwork_qta, &info); dgemm('T', 'N', k, n, m, 1.0, q, m, a, m, 0.0, q_h, k); //solve R * x = (Q'*A) dtrtrs('U','N','N',k,n,r,k,q_h,k,&info); //copy matrix q to h, but permutated according to jpvt_h for (i=0; i<k; ++i) { dcopy(n, q_h + i, k, h + jpvt_h[i] - 1, k); } //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint for (i=0; i<k*n; ++i) { if (h[i] < ZERO_THRESHOLD) h[i] = 0.; } } else { //h = max(ZERO_THRESHOLD, help1\help2) for (i=0; i < k*n; ++i) h[i] = (help2[i] > ZERO_THRESHOLD ? help2[i] : 0.); } // calculating matrix w = max(0, help1\help3)' //---------------------------- //help1 = h*h' dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., help1, k); //help3 = h*a' dgemm('N', 'T', k, m, n, 1.0, h, k, a, m, 0., help3, k); //LU-Factorisation of help1 dgesv(k, m, help1, k, jpvt_h, help3, k, &info); // if( info > 0) { // do dynamic data structures need to be allocated? if (!als_data_allocated) { als_data_allocated = 1; // factor matrices for factorizing matrix w q = (double*) malloc(sizeof(double)*m*k); r = (double*) malloc(sizeof(double)*m*k); // factor matrices for factorizing matrix h q_h = (double*) malloc(sizeof(double)*n*k); r_h = (double*) malloc(sizeof(double)*n*k); tau_h = (double*) malloc(sizeof(double)*k); //stores elementary reflectors of factor matrix Q //query for optimal workspace size for routine dgeqp3... //for matrix w dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info); lwork_w = (int) querysize; work_w = (double*) malloc(sizeof(double)*lwork_w); //work array for factorization of matrix help1 (dgeqp3) //..for matrix h dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info); lwork_h = (int) querysize; work_h = (double*) malloc(sizeof(double)*lwork_h); //work array for factorization of matrix h //query for optimal workspace size for routine dorgqr... //for matrix w dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info); lwork_qta = (int)querysize; work_qta = (double*) malloc(sizeof(double)*lwork_qta); //work array for dorgqr // ... for matrix h dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info); lwork_qth = (int)querysize; work_qth = (double*) malloc(sizeof(double)*lwork_qth); } //calculating matrix w //copy original matrix h to q_h, but transposed for (i=0; i<k; ++i) { dcopy(n, h + i, k, q_h + i*n, 1); } //initialise jpvt_a to 0 -> every column free for (i = 0; i<k; ++i) jpvt_h[i] = 0; //Q-R factorization dgeqp3(n, k, q_h, n, jpvt_h, tau_h, work_h, lwork_h, &info); //copying upper triangular factor-matrix r_h out of q into r_h dlacpy('U', n, k, q_h, n, r_h, k); //Begin of least-squares-solution to w0 * x = a //generate explicit matrix q (n times k) and calculate *a = q' * a' dorgqr(n, k, k, q_h, n, tau_h, work_qth, lwork_qth, &info); dgemm('T', 'T', k, m, n, 1.0, q_h, n, a, m, 0.0, q, k); //solve R_h * x = (Q'*A') dtrtrs('U', 'N', 'N', k, m, r_h, k, q, k, &info); //jpvt_h*(R\(Q'*A')) permutation and transposed copy to w for (i=0; i<k; ++i) { dcopy(m, q + i, k, w + m * (jpvt_h[i] - 1), 1); } //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint for (i=0; i<k*m; ++i) { if (w[i] < ZERO_THRESHOLD) w[i] = 0.; } } else { //w = max(0, help3)' for (i=0; i<k; ++i) { dcopy(m, help3 + i, k, w + i*m, 1); } for (i=0; i<m*k; ++i) { if (w[i] < ZERO_THRESHOLD) w[i] = 0.; } } // calculating the norm of D = A-W*H dnorm = calculateNorm(a, w, h, d, m, n, k); // calculating change in w -> dw //---------------------------------- double dw; dw = calculateMaxchange(w, w0, m, k, sqrteps); // calculating change in h -> dh //----------------------------------- double dh; dh = calculateMaxchange(h, h0, k, n, sqrteps); //Max-Change = max(dh, dw) = delta double delta; delta = (dh > dw) ? dh : dw; // storing the matrix results of the current iteration swap(&w0, &w); swap(&h0, &h); // storing the norm results of the current iteration dnorm0 = dnorm; #if DEBUG_LEVEL >= 1 printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta); #endif //Check for Convergence if (iter > 1) { if (delta < TolX) { *maxiter = iter; break; } else if (dnorm <= TolFun*dnorm0) { *maxiter = iter; break; } } } //end of loop from 1 to maxiter #if DEBUG_LEVEL >= 2 printf("Exiting nmf_neals\n"); #endif #ifdef PROFILE_NMF_NEALS gettimeofday(&end, 0); outputTiming("", start, end); #endif // freeing memory if used free(help1); free(help2); free(help3); free(h); free(jpvt_h); free(w); free(d); if(als_data_allocated) { free(q); free(r); free(q_h); free(r_h); free(work_h); free(work_w); free(tau_h); free(work_qta); free(work_qth); } // returning calculated norm return dnorm; }
double nmf_mu(double * a, double ** w0, double ** h0, int m, int n, \ int k, int * maxiter, const double TolX, const double TolFun) { #ifdef PROFILE_NMF_MU struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering nmf_mu\n"); #endif #ifdef ERROR_CHECKING errno = 0; #endif // definition of necessary dynamic data structures //...for calculating matrix h double* numerh = (double*) malloc(sizeof(double) *k*n); double* work1 = (double*) malloc(sizeof(double)*k*k); // used for calculation of h & w double* work2 = (double*) malloc(sizeof(double)*k*n); double* h = (double*) malloc(sizeof(double)*k*n); //---------------- //...for calculating matrix w double* numerw = (double*) malloc(sizeof(double)*m*k); double* work2w = (double*) malloc(sizeof(double)*m*k); double* w = (double*) malloc(sizeof(double)*m*k); //----------------- //...for calculating the norm of A-W*H double* d = (double*) malloc(sizeof(double)*m*n); //d = a - w*h double dnorm0 = 0.; double dnorm = 0.; const double eps = dlamch('E'); //machine precision epsilon const double sqrteps = sqrt(eps); //squareroot of epsilon //------------------- #ifdef ERROR_CHECKING if (errno) { perror("Error allocating memory in nmf_mu"); free(numerh); free(numerw); free(work1); free(work2); free(h); free(w); free(work2w); free(d); return -1; } #endif //Is ZERO_THRESHOLD _not_ defined then use machine epsilon as default #ifndef ZERO_THRESHOLD const double ZERO_THRESHOLD = eps; #endif //Loop-Indices int iter, i; // factorisation step in a loop from 1 to maxiter for (iter = 1; iter <= *maxiter; ++iter) { // calculating matrix h //---------------- // calculating numerh = w0'*a dgemm('T', 'N', k, n, m, 1.0, *w0, m, a, m, 0., numerh, k); // calculating first intermediate result work1 = w0'*w0 dgemm('T', 'N', k, k, m, 1.0, *w0, m, *w0, m, 0., work1, k); // calculating second intermediate result work2 = work1 * h0 dgemm('N', 'N', k, n, k, 1.0, work1, k, *h0, k, 0., work2, k); //calculating elementwise matrixmultiplication, Division and addition h = h0 .* (numerh ./(work2 + eps)) //set elements < zero_threshold to zero double tmp_element; for(i = 0; i< k*n; ++i) { if ( (*h0)[i] == 0. || numerh[i] == 0.) h[i] = 0.; else { tmp_element = (*h0)[i] * (numerh[i] / (work2[i] + DIV_BY_ZERO_AVOIDANCE)); h[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element; } } // calculating matrix w //---------------------------- // calculating numerw = a*h' dgemm('N', 'T', m, k, n, 1.0, a, m, h, k, 0., numerw, m); // calculating first intermediate result work1 = h*h' (kxk-Matrix) => re-use of work1 dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., work1, k); // calculating second intermediate result work2w = w0 * work1 dgemm('N', 'N', m, k, k, 1.0, *w0, m, work1, k, 0., work2w, m); //calculating elementwise matrixmultiplication, Division and addition w = w0 .* (numerw ./ (work2w + eps)) //set elements < zero_threshold to zero for(i = 0; i < m*k; ++i) { if ( (*w0)[i] == 0. || numerw[i] == 0.) w[i] = 0.; else { tmp_element = (*w0)[i] * (numerw[i] / (work2w[i] + DIV_BY_ZERO_AVOIDANCE)); w[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element; } } // calculating the norm of D = A-W*H dnorm = calculateNorm(a, w, h, d, m, n, k); // calculating change in w -> dw //---------------------------------- double dw = calculateMaxchange(w, *w0, m, k, sqrteps); // calculating change in h -> dh //----------------------------------- double dh = calculateMaxchange(h, *h0, k, n, sqrteps); //Max-Change = max(dh, dw) = delta double delta = 0.0; delta = (dh > dw) ? dh : dw; // storing the matrix results of the current iteration in W0 respectively H0 swap(w0, &w); swap(h0, &h); //Check for Convergence if (iter > 1) { if (delta < TolX) { *maxiter = iter; break; } else if (dnorm <= TolFun*dnorm0) { *maxiter = iter; break; } } // storing the norm result of the current iteration dnorm0 = dnorm; #if DEBUG_LEVEL >= 1 printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta); #endif } //end of loop from 1 to maxiter #if DEBUG_LEVEL >= 2 printf("Exiting nmf_mu\n"); #endif #ifdef PROFILE_NMF_MU gettimeofday(&end, 0); outputTiming("", start, end); #endif // freeing memory if used free(numerh); free(numerw); free(work1); free(work2); free(h); free(w); free(work2w); free(d); // returning calculated norm return dnorm; }