C++ (Cpp) outputTiming примеры использования

Язык программирования: C++ (Cpp)

Метод/Функция: outputTiming

Примеров на hotexamples.com: 4

C++ (Cpp) outputTiming - 4 примера найдено. Это лучшие примеры C++ (Cpp) кода для outputTiming, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: storematrix.c Проект: Tianzhang0809/collaborative.filtering.exp

void storeMatrix(const char *fileName, const int m, const int n, const double *matrix)
{

#ifdef PROFILE_STORE_MATRIX
	struct timeval start, end;
	gettimeofday(&start, 0);
#endif
#if DEBUG_LEVEL >= 2
	printf("Entering storeMatrix\n");
#endif
#ifdef ERROR_CHECKING
	errno = 0;		//no error occured so far
#endif

	int i, j;
	FILE *file = fopen(fileName, "w");

#ifdef ERROR_CHECKING
	if (errno) {
	  perror(fileName);
	  return;
	}
#endif

	fprintf(file, "%d\n", m);
	fprintf(file, "%d\n", n);

	for(i = 0; i < m; i++)
	{
		for(j = 0; j < n; j++)
		{
			fprintf(file, "%.15e ", matrix[i + j * m]);
		}
		fprintf(file, "\n");
	}
	fclose(file);

#if DEBUG_LEVEL >= 2
	printf("Exiting storeMatrix\n");
#endif
#ifdef PROFILE_STORE_MATRIX
	gettimeofday(&end, 0);
	outputTiming("Timing:", start, end);
#endif

}

Пример #2

Показать файл

Файл: nmfdriver.c Проект: Tianzhang0809/collaborative.filtering.exp

void nmfDriver(const char* a, const int k, int iter, const char* w0, const char* h0, alg_t alg, options_t * opts) {


#ifdef PROFILE_NMF_DRIVER
	struct timeval start, end;
	gettimeofday(&start, 0);
#endif
#if DEBUG_LEVEL >= 2
	printf("Entering nmfDriver\n");
#endif

#ifdef ERROR_CHECKING
  errno = 0;		//no error occured so far
#endif

  options_t default_opts;

  //Explicit options or Defaultoptions (if opts = NULL)
  if (!opts) {
    set_default_opts(&default_opts);
    opts = &default_opts;
  }

  // checking arguments
  if (checkArguments(a, k, iter, w0, h0, opts)) {
    perror("Arguments invalid in nmfDriver");
    return;
  }
  

  // declaring basic variables to load matrices
  int m, n, m_w0, n_w0, m_h0, n_h0;
  double * A, *W0, *H0;
  W0 = NULL;
  H0 = NULL;

  loadMatrix(a, &m, &n, &A);
  if (errno) {
    perror("Loading matrix A failed in nmfDriver");
    free(A);
    return;
  }
  if (k > m || k > n) {
    errno = EDOM;
    perror("Approximation factor k bigger than original matrix dimensions.");
    free(A);
    return;
  }
  if (w0 && h0) {
    loadMatrix(w0, &m_w0, &n_w0, &W0);
    loadMatrix(h0, &m_h0, &n_h0, &H0);
  }  
  else {
    generateMatrix(m, n, k, opts->init, opts->min_init, opts->max_init, &W0, &H0, A, opts);
  }  

#ifdef ERROR_CHECKING
  if(errno) {
    perror("Loading matrix W0 failed in nmfDriver");
    perror("Loading matrix H0 failed in nmfDriver");
    free(H0);
    free(A);
    free(W0);
    return;
  }
#endif


#ifdef PROFILE_MATLAB_COMPARISON
	struct timeval start_matlab, end_matlab;
	gettimeofday(&start_matlab, 0);
#endif


  if (checkMatrices(A, W0, H0, m, n, k)) {
    errno = EDOM;
    perror("Matrices not compatible in nmfDriver");
    free(A);
    free(W0);
    free(H0);
    return;
  }
  
  
  //memory for saving matrices w and h with the best norm
  double * wbest = (double*) malloc(sizeof(double)*m*k);
  double * hbest = (double*) malloc(sizeof(double)*k*n);
  //memory for normalizing final matrices wbest and hbest
  double * hlen = (double*) malloc(sizeof(double)*k);
  idx_double * wlen = (idx_double*) malloc(sizeof(idx_double)*k);	//stores the indices of column-sums as well for resorting wbest/hbest
  

#ifdef ERROR_CHECKING
  if(errno) {
    perror("Failed allocating memory in nmfDriver");
    free(A);
    free(W0);
    free(H0);
    free(wbest);
    free(hbest);
    free(hlen);
    free(wlen);
    return;
  }
#endif

  double norm = 0.0;
  norm = HUGE_VAL;
  double normbest = 0.0;
  normbest = HUGE_VAL;

  int repetitions;

  for (repetitions = 1; repetitions <= opts->rep; ++repetitions) {
    switch (alg) {
      case mu:	norm = nmf_mu(A, &W0, &H0, m, n, k, &iter, opts->TolX, opts->TolFun);
		break;
      case als:	norm = nmf_als(&A, &W0, &H0, m, n, k, &iter, opts->TolX, opts->TolFun);
		break;
      case neals: norm = nmf_neals(&A, &W0, &H0, m, n, k, &iter, opts->TolX, opts->TolFun);
		break;
      case alspg:	norm = nmf_alspg(&A, &W0, &H0, m, n, k, &iter, opts->TolX);
		break;
      case pg:	norm = nmf_pg(&A, &W0, &H0, m, n, k, &iter, opts->TolX);
		break;
    }
    //storing best matrices w and h so far, if the norm is the best so far
    if (norm < normbest) {
      normbest = norm;
      swap(&wbest, &W0);
      swap(&hbest, &H0);
    }
    //re-initialise the starting matrices w0 and h0, if there are more repetitions to be run
    if (repetitions < opts->rep) {
      generateMatrix(m, n, k, ran, opts->min_init, opts->max_init, &W0, &H0, A, opts);
    }
  } //end of loop from 1 to rep


#if DEBUG_LEVEL >= 0
  //Output final norm
  printf("Final Norm: %.16f\n", normbest);
#endif
 

  //normalizing results
  //-------------------
  double temp;
  temp = 0.;
  int i, j;

  //calculating hlen
  for (i = 0; i<k; ++i) {
    temp = 0.;
    for (j = 0; j<n; ++j) {
      temp += pow(hbest[i + j*k], 2);
    }
    temp = sqrt(temp);
    
    if (temp == 0.) {
      hlen[i] = 1.;
      fprintf(stderr, "Warning: Matrix H doesn't have full rank\n");
    }
    else
      hlen[i] = temp;
    
  }
  
  //wbest = wbest .* hlen'
  for (i=0; i<m; ++i) {
    for (j=0; j<k; ++j) {
      wbest[i + j*m] *= hlen[j];
    }
  }
  //hbest = hbest ./ hlen

  for (j=0; j<n; ++j)
    for (i=0; i<k; ++i)
      hbest[i + j*k] /= hlen[i];

  //Calculating wlen for sorting columns of w and rows of h
  for(j = 0; j<k; ++j) {
    temp = 0;
    for(i=0; i<m; ++i) {
      temp += wbest[i+j*m] * wbest[i+j*m];
    }
    wlen[j].val = temp;
    wlen[j].idx = j;
  }
  
  //sort wlen in descending order
  int elementsize = 0;
  elementsize = sizeof(idx_double);
  qsort(wlen, k, elementsize, cmpidx_double);
  
  
  //resorting columns of wbest according to order in wlen
  for(j=0; j<k; ++j) {
    for(i=0; i<m; ++i) {
      W0[i + j*m] = wbest[i + wlen[j].idx*m];
    }
  }
  
  //resorting rows of hbest according to order in wlen
  for(j=0; j<n; ++j) {
    for(i=0; i<k; ++i) {
      H0[i + j*k] = hbest[wlen[i].idx + j*k]; 
    }
  }



#ifdef PROFILE_MATLAB_COMPARISON
	gettimeofday(&end_matlab, 0);
	outputTiming("Timing of MATLAB_COMPARISON:", start_matlab, end_matlab);
#endif  
  

  //storing final results in files
  if (opts->w_out)
    storeMatrix(opts->w_out, m, k, W0);
  if (opts->h_out)
    storeMatrix(opts->h_out, k, n, H0);


  //freeing memory of dynamic variables
  free(A);
  free(W0);
  free(H0);
  free(wbest);
  free(hbest);
  free(hlen);
  free(wlen);

#if DEBUG_LEVEL >= 2
	printf("Exiting nmfDriver\n");
#endif
#ifdef PROFILE_NMF_DRIVER
	gettimeofday(&end, 0);
	outputTiming("Timing:", start, end);
#endif

}

Пример #3

Показать файл

Файл: nmf_neals.c Проект: mschubert/NMFconsensus

double nmf_neals(double * a, double * w0, double * h0, int * pm, int * pn, \
		      int * pk, int * maxiter, const double * pTolX, const double * pTolFun) 
{

// code added to be able to call from R
int m = * pm;
int n = * pn;
int k = * pk;
const double TolX = * pTolX;
const double TolFun = * pTolFun;
// also: changed w0, h0 to simple pointer (instead of double)
// // end code added

#ifdef PROFILE_NMF_NEALS
	struct timeval start, end;
	gettimeofday(&start, 0);
#endif
#if DEBUG_LEVEL >= 2
	printf("Entering nmf_neals\n");
#endif


#ifdef ERROR_CHECKING
  errno = 0;
#endif


  double * help1 = (double*) malloc(sizeof(double)*k*k);
  double * help2 = (double*) malloc(sizeof(double)*k*n);
  double * help3 = (double*) malloc(sizeof(double)*k*m);

  //-----------------------------------------



  // definition of necessary dynamic data structures
  //...for calculating matrix h
  double* h = (double*) malloc(sizeof(double)*k*n);
  int* jpvt_h = (int*) malloc(sizeof(int)*k);
  int info;
  //...for calculating matrix w
   double* w = (double*) malloc(sizeof(double)*m*k);
  //----------------


  //...for calculating the norm of A-W*H
  double* d = (double*) malloc(sizeof(double)*m*n);					//d = a - w*h
  double dnorm0 = 0;
  double dnorm = 0;
  const double eps = dlamch('E');					//machine precision epsilon
  const double sqrteps = sqrt(eps);					//squareroot of epsilon
  

  //-------------------

#ifdef ERROR_CHECKING
  if (errno) {
    perror("Error allocating memory in nmf_neals");
    free(help1);
    free(help2);
    free(help3);
    free(h);
    free(jpvt_h);
    free(w);
    free(d);
    return -1;
  }
#endif


  // declaration of data structures for switch to als algorithm
  // ----------------------------------------------------------

  int als_data_allocated = 0;					// indicates wheter data structures were already allocated
  // factor matrices for factorizing matrix w
  double * q;
  double * r;
  // factor matrices for factorizing matrix h
  double * q_h;
  double * r_h;

  double* tau_h;                   //stores elementary reflectors of factor matrix Q
  double* work_w;            //work array for factorization of matrix w
  int lwork_w;
  double* work_h;            //work array for factorization of matrix h  
  int lwork_h;
  double * work_qta;	     //work array for dorgqr
  int lwork_qta;
  double * work_qth;	     //work array for dorgqr
  int lwork_qth;

  //query for optimal workspace size for routine dgeqp3...
  double querysize;
  


  

  
  //Loop-Indices
  int iter, i;

  //variable for storing if fallback happened in current iteration
  int fallback;

  // factorisation step in a loop from 1 to maxiter
  for (iter = 1; iter <= *maxiter; ++iter) {

    //no fallback in this iteration so far
    fallback = 0;

    // calculating matrix h
    //----------------
    //help1 = w0'*w0
    dgemm('T', 'N', k, k, m, 1.0, w0, m, w0, m, 0., help1, k);
    //help2 = w0'*a
    dgemm('T', 'N', k, n, m, 1.0, w0, m, a, m, 0., help2, k);
    //LU-Factorisation of help1 to solve equation help1 * x = help2
    dgesv(k, n, help1, k, jpvt_h, help2, k, &info);
    // if factor matrix U is singular -> switch back to als algorithm to compute h
    if( info > 0) {

	//set fallback to 1 to  indicate that fallback happened
	fallback = 1;

	// do dynamic data structures need to be allocated?
	if (!als_data_allocated) {
	  als_data_allocated = 1;

  	  // factor matrices for factorizing matrix w
	  q = (double*) malloc(sizeof(double)*m*k);
	  r = (double*) malloc(sizeof(double)*m*k);
	  // factor matrices for factorizing matrix h
	  q_h = (double*) malloc(sizeof(double)*n*k);
	  r_h = (double*) malloc(sizeof(double)*n*k);

	  tau_h = (double*) malloc(sizeof(double)*k);                   //stores elementary reflectors of factor matrix Q


          //query for optimal workspace size for routine dgeqp3...
	  //for matrix w
	  dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info);
	  lwork_w = (int) querysize;
	  work_w = (double*) malloc(sizeof(double)*lwork_w);            //work array for factorization of matrix help1 (dgeqp3)
	  //for matrix h
	  dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info);
	  lwork_h = (int) querysize;
	  work_h = (double*) malloc(sizeof(double)*lwork_h);            //work array for factorization of matrix h


	  //query for optimal workspace size for routine dorgqr...
	  //for matrix w
	  dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info);
	  lwork_qta = (int)querysize;
	  work_qta = (double*) malloc(sizeof(double)*lwork_qta);	  //work array for dorgqr
	  //for matrix h
	  dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info);
	  lwork_qth = (int)querysize;
	  work_qth = (double*) malloc(sizeof(double)*lwork_qth);

	}


        // calculating matrix h
        //----------------
        //re-initialization

        //copy *w0 to q
        dlacpy('A', m, k, w0, m, q, m);


        //initialise jpvt_h to 0 -> every column free
        for (i = 0; i<k; ++i)
          jpvt_h[i] = 0;

        // Q-R factorization with column pivoting
        dgeqp3(m, k, q, m, jpvt_h, tau_h, work_w, lwork_w, &info);


        //copying upper triangular factor-matrix r out of q into r
        dlacpy('U', m, k, q, m, r, k);


        //Begin of least-squares-solution to w0 * x = a

        //generate explicit matrix q (m times k) and calculate q' * a
        dorgqr(m, k, k, q, m, tau_h, work_qta, lwork_qta, &info);
        dgemm('T', 'N', k, n, m, 1.0, q, m, a, m, 0.0, q_h, k);


        //solve R * x = (Q'*A)
        dtrtrs('U','N','N',k,n,r,k,q_h,k,&info);

        //copy matrix q to h, but permutated according to jpvt_h
        for (i=0; i<k; ++i) {
          dcopy(n, q_h + i, k, h + jpvt_h[i] - 1, k);
        }

        //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint
        for (i=0; i<k*n; ++i) {
        if (h[i] < ZERO_THRESHOLD)
          h[i] = 0.;
        }


    }
    else {
      //h = max(ZERO_THRESHOLD, help1\help2)
      for (i=0; i < k*n; ++i)
        h[i] = (help2[i] > ZERO_THRESHOLD ? help2[i] : 0.);
    }


	    // calculating matrix w = max(0, help1\help3)'
	    //----------------------------
	    //help1 = h*h'
	    dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., help1, k);
	    //help3 = h*a'
	    dgemm('N', 'T', k, m, n, 1.0, h, k, a, m, 0., help3, k);
	    //LU-Factorisation of help1
	    dgesv(k, m, help1, k, jpvt_h, help3, k, &info);
	    //
	    if( info > 0) {
		// do dynamic data structures need to be allocated?
	        if (!als_data_allocated) {
	          als_data_allocated = 1;

	          // factor matrices for factorizing matrix w
	          q = (double*) malloc(sizeof(double)*m*k);
	          r = (double*) malloc(sizeof(double)*m*k);
	          // factor matrices for factorizing matrix h
	          q_h = (double*) malloc(sizeof(double)*n*k);
	          r_h = (double*) malloc(sizeof(double)*n*k);
	
	          tau_h = (double*) malloc(sizeof(double)*k);                   //stores elementary reflectors of factor matrix Q
	
	
	          //query for optimal workspace size for routine dgeqp3...
		
		  //for matrix w
        	  dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info);
	          lwork_w = (int) querysize;
        	  work_w = (double*) malloc(sizeof(double)*lwork_w);            //work array for factorization of matrix help1 (dgeqp3)
	
		  //..for matrix h
		  dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info);
		  lwork_h = (int) querysize;
		  work_h = (double*) malloc(sizeof(double)*lwork_h);            //work array for factorization of matrix h
	
	
        	  //query for optimal workspace size for routine dorgqr...
		  //for matrix w
	          dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info);
	          lwork_qta = (int)querysize;
	          work_qta = (double*) malloc(sizeof(double)*lwork_qta);          //work array for dorgqr
		  // ... for matrix h
		  dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info);
		  lwork_qth = (int)querysize;
		  work_qth = (double*) malloc(sizeof(double)*lwork_qth);
	  
	        } 

	
		//calculating matrix w
	        //copy original matrix h to q_h, but transposed
	        for (i=0; i<k; ++i) {
	          dcopy(n, h + i, k, q_h + i*n, 1);
	        }


        	//initialise jpvt_a to 0 -> every column free
	        for (i = 0; i<k; ++i)
	          jpvt_h[i] = 0;
	
        	//Q-R factorization
	        dgeqp3(n, k, q_h, n, jpvt_h, tau_h, work_h, lwork_h, &info);


	        //copying upper triangular factor-matrix r_h out of q into r_h
	        dlacpy('U', n, k, q_h, n, r_h, k);
	

	        //Begin of least-squares-solution to w0 * x = a
 
	        //generate explicit matrix q (n times k) and calculate *a = q' * a'
	        dorgqr(n, k, k, q_h, n, tau_h, work_qth, lwork_qth, &info);
	        dgemm('T', 'T', k, m, n, 1.0, q_h, n, a, m, 0.0, q, k);




	        //solve R_h * x = (Q'*A')
	        dtrtrs('U', 'N', 'N', k, m, r_h, k, q, k, &info);


        	//jpvt_h*(R\(Q'*A')) permutation and transposed copy to w
	        for (i=0; i<k; ++i) {
	          dcopy(m, q + i, k, w + m * (jpvt_h[i] - 1), 1);
        	}

	        //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint
        	for (i=0; i<k*m; ++i) {
	          if (w[i] < ZERO_THRESHOLD)
	        	    w[i] = 0.;
        	}


	    }

	    else {
	        //w = max(0, help3)'
	        for (i=0; i<k; ++i) {
	          dcopy(m, help3 + i, k, w + i*m, 1);
	        }
	        for (i=0; i<m*k; ++i) {
	          if (w[i] < ZERO_THRESHOLD)
	            w[i] = 0.;
	        }
	    }




    
    // calculating the norm of D = A-W*H
    dnorm = calculateNorm(a, w, h, d, m, n, k);

    
    // calculating change in w -> dw
    //----------------------------------
    double dw;
    dw = calculateMaxchange(w, w0, m, k, sqrteps);

    
    // calculating change in h -> dh
    //-----------------------------------
    double dh;
    dh = calculateMaxchange(h, h0, k, n, sqrteps);

    //Max-Change = max(dh, dw) = delta
    double delta;
    delta = (dh > dw) ? dh : dw;

   

    // storing the matrix results of the current iteration
    swap(&w0, &w);
    swap(&h0, &h);



    // storing the norm results of the current iteration
    dnorm0 = dnorm;


#if DEBUG_LEVEL >= 1
  printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta);
#endif   


    //Check for Convergence
    if (iter > 1) {
      if (delta < TolX) {
        *maxiter = iter;
        break;
      }
      else
        if (dnorm <= TolFun*dnorm0) {
        *maxiter = iter;
        break;
        }


    }
   
  } //end of loop from 1 to maxiter

#if DEBUG_LEVEL >= 2
	printf("Exiting nmf_neals\n");
#endif
#ifdef PROFILE_NMF_NEALS
	gettimeofday(&end, 0);
	outputTiming("", start, end);
#endif

  // freeing memory if used
    free(help1);
    free(help2);
    free(help3);
    free(h);
    free(jpvt_h);
    free(w);
    free(d);

    if(als_data_allocated) {
      free(q);
      free(r);
      free(q_h);
      free(r_h);
      free(work_h);
      free(work_w);
      free(tau_h);
      free(work_qta);
      free(work_qth);
    }

  // returning calculated norm
  return dnorm;
}

Пример #4

Показать файл

Файл: nmf_mu.c Проект: Tianzhang0809/collaborative.filtering.exp

double nmf_mu(double * a, double ** w0, double ** h0, int m, int n, \
		      int k, int * maxiter, const double TolX, const double TolFun) 
{
  

#ifdef PROFILE_NMF_MU
        struct timeval start, end;
        gettimeofday(&start, 0);
#endif

#if DEBUG_LEVEL >= 2
	printf("Entering nmf_mu\n");
#endif


#ifdef ERROR_CHECKING
  errno = 0;
#endif

  // definition of necessary dynamic data structures
  //...for calculating matrix h
  double* numerh = (double*) malloc(sizeof(double) *k*n);
  double* work1 = (double*) malloc(sizeof(double)*k*k);					// used for calculation of h & w
  double* work2 = (double*) malloc(sizeof(double)*k*n);
  double* h = (double*) malloc(sizeof(double)*k*n);
  //----------------

  //...for calculating matrix w
  double* numerw = (double*) malloc(sizeof(double)*m*k);
  double* work2w = (double*) malloc(sizeof(double)*m*k);
  double* w = (double*) malloc(sizeof(double)*m*k);
  //-----------------

  //...for calculating the norm of A-W*H
  double* d = (double*) malloc(sizeof(double)*m*n);					//d = a - w*h
  double dnorm0 = 0.;
  double dnorm = 0.;
  const double eps = dlamch('E');					//machine precision epsilon
  const double sqrteps = sqrt(eps);					//squareroot of epsilon
  


  //-------------------

#ifdef ERROR_CHECKING
  if (errno) {
    perror("Error allocating memory in nmf_mu");
    free(numerh);
    free(numerw);
    free(work1);
    free(work2);
    free(h);
    free(w);
    free(work2w);
    free(d);
    return -1;
  }
#endif



//Is ZERO_THRESHOLD _not_ defined then use machine epsilon as default
#ifndef ZERO_THRESHOLD
  const double ZERO_THRESHOLD = eps;
#endif

  
  //Loop-Indices
  int iter, i;



  // factorisation step in a loop from 1 to maxiter
  for (iter = 1; iter <= *maxiter; ++iter) {



    // calculating matrix h
    //----------------
    // calculating numerh = w0'*a
    dgemm('T', 'N', k, n, m, 1.0, *w0, m, a, m, 0., numerh, k);  
    // calculating first intermediate result work1 = w0'*w0
    dgemm('T', 'N', k, k, m, 1.0, *w0, m, *w0, m, 0., work1, k);
    // calculating second intermediate result work2 = work1 * h0
    dgemm('N', 'N', k, n, k, 1.0, work1, k, *h0, k, 0., work2, k);


    //calculating elementwise matrixmultiplication, Division and addition h = h0 .* (numerh ./(work2 + eps))
    //set elements < zero_threshold to zero
    double tmp_element;
    for(i = 0; i< k*n; ++i) {
      if ( (*h0)[i] == 0. || numerh[i]  == 0.)
	h[i] = 0.;
      else {
	tmp_element = (*h0)[i] * (numerh[i] / (work2[i] + DIV_BY_ZERO_AVOIDANCE));
	h[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element;
      }
    }
    


    // calculating matrix w
    //----------------------------
    // calculating numerw = a*h'
    dgemm('N', 'T', m, k, n, 1.0, a, m, h, k, 0., numerw, m);
    // calculating first intermediate result work1 = h*h' (kxk-Matrix) => re-use of work1
    dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., work1, k);
    // calculating second intermediate result work2w = w0 * work1
    dgemm('N', 'N', m, k, k, 1.0, *w0, m, work1, k, 0., work2w, m);




    //calculating elementwise matrixmultiplication, Division and addition w = w0 .* (numerw ./ (work2w + eps))
    //set elements < zero_threshold to zero
    for(i = 0; i < m*k; ++i) {
      if ( (*w0)[i] == 0. || numerw[i] == 0.)
	w[i] = 0.;
      else {
	tmp_element = (*w0)[i] * (numerw[i] / (work2w[i] + DIV_BY_ZERO_AVOIDANCE));
	w[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element;
      }
    }  



    
    // calculating the norm of D = A-W*H
    dnorm = calculateNorm(a, w, h, d, m, n, k);

    
    // calculating change in w -> dw
    //----------------------------------
    double dw = calculateMaxchange(w, *w0, m, k, sqrteps);

    
    // calculating change in h -> dh
    //-----------------------------------
    double dh = calculateMaxchange(h, *h0, k, n, sqrteps);

    //Max-Change = max(dh, dw) = delta
    double delta = 0.0;
    delta = (dh > dw) ? dh : dw;


    // storing the matrix results of the current iteration in W0 respectively H0
    swap(w0, &w);
    swap(h0, &h);





    //Check for Convergence
    if (iter > 1) {
      if (delta < TolX) {
	*maxiter = iter;
	break;
      }
      else
	if (dnorm <= TolFun*dnorm0) {
	*maxiter = iter;
	break;
	}


    }

    
    // storing the norm result of the current iteration
    dnorm0 = dnorm;






#if DEBUG_LEVEL >= 1
  printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta);
#endif   
   
  } //end of loop from 1 to maxiter

#if DEBUG_LEVEL >= 2
	printf("Exiting nmf_mu\n");
#endif
#ifdef PROFILE_NMF_MU
	gettimeofday(&end, 0);
	outputTiming("", start, end);
#endif

  // freeing memory if used
  
  free(numerh);
  free(numerw);
  free(work1);
  free(work2);
  free(h);
  free(w);
  free(work2w);
  free(d);

  // returning calculated norm
  return dnorm;
}