Example #1
double nmf_neals(double * a, double * w0, double * h0, int * pm, int * pn, \
		      int * pk, int * maxiter, const double * pTolX, const double * pTolFun) 

// code added to be able to call from R
int m = * pm;
int n = * pn;
int k = * pk;
const double TolX = * pTolX;
const double TolFun = * pTolFun;
// also: changed w0, h0 to simple pointer (instead of double)
// // end code added

	struct timeval start, end;
	gettimeofday(&start, 0);
#if DEBUG_LEVEL >= 2
	printf("Entering nmf_neals\n");

  errno = 0;

  double * help1 = (double*) malloc(sizeof(double)*k*k);
  double * help2 = (double*) malloc(sizeof(double)*k*n);
  double * help3 = (double*) malloc(sizeof(double)*k*m);


  // definition of necessary dynamic data structures
  //...for calculating matrix h
  double* h = (double*) malloc(sizeof(double)*k*n);
  int* jpvt_h = (int*) malloc(sizeof(int)*k);
  int info;
  //...for calculating matrix w
   double* w = (double*) malloc(sizeof(double)*m*k);

  //...for calculating the norm of A-W*H
  double* d = (double*) malloc(sizeof(double)*m*n);					//d = a - w*h
  double dnorm0 = 0;
  double dnorm = 0;
  const double eps = dlamch('E');					//machine precision epsilon
  const double sqrteps = sqrt(eps);					//squareroot of epsilon


  if (errno) {
    perror("Error allocating memory in nmf_neals");
    return -1;

  // declaration of data structures for switch to als algorithm
  // ----------------------------------------------------------

  int als_data_allocated = 0;					// indicates wheter data structures were already allocated
  // factor matrices for factorizing matrix w
  double * q;
  double * r;
  // factor matrices for factorizing matrix h
  double * q_h;
  double * r_h;

  double* tau_h;                   //stores elementary reflectors of factor matrix Q
  double* work_w;            //work array for factorization of matrix w
  int lwork_w;
  double* work_h;            //work array for factorization of matrix h  
  int lwork_h;
  double * work_qta;	     //work array for dorgqr
  int lwork_qta;
  double * work_qth;	     //work array for dorgqr
  int lwork_qth;

  //query for optimal workspace size for routine dgeqp3...
  double querysize;


  int iter, i;

  //variable for storing if fallback happened in current iteration
  int fallback;

  // factorisation step in a loop from 1 to maxiter
  for (iter = 1; iter <= *maxiter; ++iter) {

    //no fallback in this iteration so far
    fallback = 0;

    // calculating matrix h
    //help1 = w0'*w0
    dgemm('T', 'N', k, k, m, 1.0, w0, m, w0, m, 0., help1, k);
    //help2 = w0'*a
    dgemm('T', 'N', k, n, m, 1.0, w0, m, a, m, 0., help2, k);
    //LU-Factorisation of help1 to solve equation help1 * x = help2
    dgesv(k, n, help1, k, jpvt_h, help2, k, &info);
    // if factor matrix U is singular -> switch back to als algorithm to compute h
    if( info > 0) {

	//set fallback to 1 to  indicate that fallback happened
	fallback = 1;

	// do dynamic data structures need to be allocated?
	if (!als_data_allocated) {
	  als_data_allocated = 1;

  	  // factor matrices for factorizing matrix w
	  q = (double*) malloc(sizeof(double)*m*k);
	  r = (double*) malloc(sizeof(double)*m*k);
	  // factor matrices for factorizing matrix h
	  q_h = (double*) malloc(sizeof(double)*n*k);
	  r_h = (double*) malloc(sizeof(double)*n*k);

	  tau_h = (double*) malloc(sizeof(double)*k);                   //stores elementary reflectors of factor matrix Q

          //query for optimal workspace size for routine dgeqp3...
	  //for matrix w
	  dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info);
	  lwork_w = (int) querysize;
	  work_w = (double*) malloc(sizeof(double)*lwork_w);            //work array for factorization of matrix help1 (dgeqp3)
	  //for matrix h
	  dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info);
	  lwork_h = (int) querysize;
	  work_h = (double*) malloc(sizeof(double)*lwork_h);            //work array for factorization of matrix h

	  //query for optimal workspace size for routine dorgqr...
	  //for matrix w
	  dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info);
	  lwork_qta = (int)querysize;
	  work_qta = (double*) malloc(sizeof(double)*lwork_qta);	  //work array for dorgqr
	  //for matrix h
	  dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info);
	  lwork_qth = (int)querysize;
	  work_qth = (double*) malloc(sizeof(double)*lwork_qth);


        // calculating matrix h

        //copy *w0 to q
        dlacpy('A', m, k, w0, m, q, m);

        //initialise jpvt_h to 0 -> every column free
        for (i = 0; i<k; ++i)
          jpvt_h[i] = 0;

        // Q-R factorization with column pivoting
        dgeqp3(m, k, q, m, jpvt_h, tau_h, work_w, lwork_w, &info);

        //copying upper triangular factor-matrix r out of q into r
        dlacpy('U', m, k, q, m, r, k);

        //Begin of least-squares-solution to w0 * x = a

        //generate explicit matrix q (m times k) and calculate q' * a
        dorgqr(m, k, k, q, m, tau_h, work_qta, lwork_qta, &info);
        dgemm('T', 'N', k, n, m, 1.0, q, m, a, m, 0.0, q_h, k);

        //solve R * x = (Q'*A)

        //copy matrix q to h, but permutated according to jpvt_h
        for (i=0; i<k; ++i) {
          dcopy(n, q_h + i, k, h + jpvt_h[i] - 1, k);

        //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint
        for (i=0; i<k*n; ++i) {
        if (h[i] < ZERO_THRESHOLD)
          h[i] = 0.;

    else {
      //h = max(ZERO_THRESHOLD, help1\help2)
      for (i=0; i < k*n; ++i)
        h[i] = (help2[i] > ZERO_THRESHOLD ? help2[i] : 0.);

	    // calculating matrix w = max(0, help1\help3)'
	    //help1 = h*h'
	    dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., help1, k);
	    //help3 = h*a'
	    dgemm('N', 'T', k, m, n, 1.0, h, k, a, m, 0., help3, k);
	    //LU-Factorisation of help1
	    dgesv(k, m, help1, k, jpvt_h, help3, k, &info);
	    if( info > 0) {
		// do dynamic data structures need to be allocated?
	        if (!als_data_allocated) {
	          als_data_allocated = 1;

	          // factor matrices for factorizing matrix w
	          q = (double*) malloc(sizeof(double)*m*k);
	          r = (double*) malloc(sizeof(double)*m*k);
	          // factor matrices for factorizing matrix h
	          q_h = (double*) malloc(sizeof(double)*n*k);
	          r_h = (double*) malloc(sizeof(double)*n*k);
	          tau_h = (double*) malloc(sizeof(double)*k);                   //stores elementary reflectors of factor matrix Q
	          //query for optimal workspace size for routine dgeqp3...
		  //for matrix w
        	  dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info);
	          lwork_w = (int) querysize;
        	  work_w = (double*) malloc(sizeof(double)*lwork_w);            //work array for factorization of matrix help1 (dgeqp3)
		  //..for matrix h
		  dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info);
		  lwork_h = (int) querysize;
		  work_h = (double*) malloc(sizeof(double)*lwork_h);            //work array for factorization of matrix h
        	  //query for optimal workspace size for routine dorgqr...
		  //for matrix w
	          dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info);
	          lwork_qta = (int)querysize;
	          work_qta = (double*) malloc(sizeof(double)*lwork_qta);          //work array for dorgqr
		  // ... for matrix h
		  dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info);
		  lwork_qth = (int)querysize;
		  work_qth = (double*) malloc(sizeof(double)*lwork_qth);

		//calculating matrix w
	        //copy original matrix h to q_h, but transposed
	        for (i=0; i<k; ++i) {
	          dcopy(n, h + i, k, q_h + i*n, 1);

        	//initialise jpvt_a to 0 -> every column free
	        for (i = 0; i<k; ++i)
	          jpvt_h[i] = 0;
        	//Q-R factorization
	        dgeqp3(n, k, q_h, n, jpvt_h, tau_h, work_h, lwork_h, &info);

	        //copying upper triangular factor-matrix r_h out of q into r_h
	        dlacpy('U', n, k, q_h, n, r_h, k);

	        //Begin of least-squares-solution to w0 * x = a
	        //generate explicit matrix q (n times k) and calculate *a = q' * a'
	        dorgqr(n, k, k, q_h, n, tau_h, work_qth, lwork_qth, &info);
	        dgemm('T', 'T', k, m, n, 1.0, q_h, n, a, m, 0.0, q, k);

	        //solve R_h * x = (Q'*A')
	        dtrtrs('U', 'N', 'N', k, m, r_h, k, q, k, &info);

        	//jpvt_h*(R\(Q'*A')) permutation and transposed copy to w
	        for (i=0; i<k; ++i) {
	          dcopy(m, q + i, k, w + m * (jpvt_h[i] - 1), 1);

	        //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint
        	for (i=0; i<k*m; ++i) {
	          if (w[i] < ZERO_THRESHOLD)
	        	    w[i] = 0.;


	    else {
	        //w = max(0, help3)'
	        for (i=0; i<k; ++i) {
	          dcopy(m, help3 + i, k, w + i*m, 1);
	        for (i=0; i<m*k; ++i) {
	          if (w[i] < ZERO_THRESHOLD)
	            w[i] = 0.;

    // calculating the norm of D = A-W*H
    dnorm = calculateNorm(a, w, h, d, m, n, k);

    // calculating change in w -> dw
    double dw;
    dw = calculateMaxchange(w, w0, m, k, sqrteps);

    // calculating change in h -> dh
    double dh;
    dh = calculateMaxchange(h, h0, k, n, sqrteps);

    //Max-Change = max(dh, dw) = delta
    double delta;
    delta = (dh > dw) ? dh : dw;


    // storing the matrix results of the current iteration
    swap(&w0, &w);
    swap(&h0, &h);

    // storing the norm results of the current iteration
    dnorm0 = dnorm;

#if DEBUG_LEVEL >= 1
  printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta);

    //Check for Convergence
    if (iter > 1) {
      if (delta < TolX) {
        *maxiter = iter;
        if (dnorm <= TolFun*dnorm0) {
        *maxiter = iter;

  } //end of loop from 1 to maxiter

#if DEBUG_LEVEL >= 2
	printf("Exiting nmf_neals\n");
	gettimeofday(&end, 0);
	outputTiming("", start, end);

  // freeing memory if used

    if(als_data_allocated) {

  // returning calculated norm
  return dnorm;
Example #2
int main( int argc, char  **argv ) {

     char *progname = argv[0];
     FILE *in ,  *out;
     char opt;
     int i;
     double startTime;

     C = 0;
     kernelType =  -1;
     degree = 0;
     sigmaSqr = 0;
     binaryFeature = 0;

     /* Check command line options */
     optarg = NULL;
     while (((opt = getopt(argc, argv, OPTSTRING)) != -1)) {
            switch(opt)  {
            case 't':
            if ( strcmp(optarg, "0") == 0 ) kernelType = 0;
            else if ( strcmp(optarg, "1")  == 0 ) kernelType = 1;
            else if ( strcmp(optarg, "2")  == 0 ) kernelType = 2;
            else {
                   fprintf( stderr, "kernel type is either 0,1 or 2\n");
            case 'c':
            if( sscanf(optarg, "%f", &C) == 0 ) {
                fprintf(stderr,"Expect  a positive number for C.\n");
            } else
                C = atof(optarg);
              if( C  <= 0 ) {
                 fprintf( stderr, "C has to be > 0\n");
              case 'd':
                if( sscanf(optarg, "%d", &degree) == 0 ) {
                   fprintf( stderr, "Expect degree to be a positive integer.\n");
                } else degree = atoi(optarg);
                if ( degree <= 0 ) {
                  fprintf( stderr, "degree has to be a positive integer.\n");
                case 'v':
                if( sscanf(optarg, "%f", &sigmaSqr) == 0 ) {
                    fprintf(stderr,"Expect  a positive number for variance.\n");
                } else sigmaSqr = atof(optarg);
                  if( sigmaSqr  <= 0 ) {
                     fprintf( stderr, "variance has to be > 0\n");

                  rbfConstant = 1/(2*sigmaSqr);
                  case 'b':
                  if( sscanf(optarg, "%d", &binaryFeature) == 0 ) {
                     fprintf( stderr, "binaryFeature option is either 0 or 1.\n");
                  } else binaryFeature = atoi(optarg);
                if ( binaryFeature != 0 && binaryFeature != 1 ) {
                    fprintf( stderr, "binaryFeature option is either 0 or 1.\n");
                case 'h':
                useMsg( progname );
                  useMsg( progname );

          /* Check all necessary parameters are in */

          if( kernelType ==  -1 ) {
               fprintf( stderr, "Kernel type has not been specified.\n");
          } else if( kernelType == 1 && degree == 0 ) {
              fprintf( stderr, "Degree has not been specified.\n");
          } else if( kernelType == 2 && sigmaSqr == 0 ){
                fprintf( stderr, "Variance has not been specified.\n");
          } else if( C == 0 ) C = DEFAULT;

        /* Check training file and model file */
         printf("INPUT FILE %s\n", argv[argc-2]);
          if (( in = fopen( argv[argc-2], "r") ) == NULL ) {
               fprintf( stderr, "Can't open %s\n",  argv[argc-2] );

          if (( out = fopen( argv[argc-1], "w") ) == NULL ) {
              fprintf( stderr, "Can't open %s\n",  argv[argc-1] );

         printf("smo_learn is preparing to learn. . .\n");
         if( ! readFile( in ) ) {
             fprintf( stderr, "Error in initializing. Program exits.\n" );
             exit (1);
         } else fclose( in );

         if( !initializeTraining()) {
             fprintf( stderr, "Error in initializing data structure. Program exits.\n");

         printf("Start training . . .\n");
         startTime = clock()/CLOCKS_PER_SEC;
         printf("Training is completed\n");

         /* Print training statistics */
         printf("CPU time is %f secs\n", clock()/CLOCKS_PER_SEC-startTime);
         printf("Writing training results . . .\n");
         writeModel( out );
         fclose( out );
         printf("Finish writing training results.\n");
         printf("no of iteration is %f\n",  iteration);
         printf("threshold b is %f\n", getb());
         if ( kernelType == 0 )
                printf("norm of weight vector is %f\n",  calculateNorm());
         printf("no. of unBound multipliers is %d\n",  unBoundSv );
         printf("no. of bounded multipliers is %d\n",  boundSv );

         /* Free memory */
         free( target );
         free( lambda );
         free( nonZeroFeature );
         free( error );
         free( nonBound );
         free( weight );
         free( unBoundIndex );
         free( nonZeroLambda );
         for( i = 0; i <= numExample; i++ ) {
            free( example[i] );
         free( example );
         free( errorCache );

         return 0;
double nmf_mu(double * a, double ** w0, double ** h0, int m, int n, \
		      int k, int * maxiter, const double TolX, const double TolFun) 

        struct timeval start, end;
        gettimeofday(&start, 0);

#if DEBUG_LEVEL >= 2
	printf("Entering nmf_mu\n");

  errno = 0;

  // definition of necessary dynamic data structures
  //...for calculating matrix h
  double* numerh = (double*) malloc(sizeof(double) *k*n);
  double* work1 = (double*) malloc(sizeof(double)*k*k);					// used for calculation of h & w
  double* work2 = (double*) malloc(sizeof(double)*k*n);
  double* h = (double*) malloc(sizeof(double)*k*n);

  //...for calculating matrix w
  double* numerw = (double*) malloc(sizeof(double)*m*k);
  double* work2w = (double*) malloc(sizeof(double)*m*k);
  double* w = (double*) malloc(sizeof(double)*m*k);

  //...for calculating the norm of A-W*H
  double* d = (double*) malloc(sizeof(double)*m*n);					//d = a - w*h
  double dnorm0 = 0.;
  double dnorm = 0.;
  const double eps = dlamch('E');					//machine precision epsilon
  const double sqrteps = sqrt(eps);					//squareroot of epsilon


  if (errno) {
    perror("Error allocating memory in nmf_mu");
    return -1;

//Is ZERO_THRESHOLD _not_ defined then use machine epsilon as default
  const double ZERO_THRESHOLD = eps;

  int iter, i;

  // factorisation step in a loop from 1 to maxiter
  for (iter = 1; iter <= *maxiter; ++iter) {

    // calculating matrix h
    // calculating numerh = w0'*a
    dgemm('T', 'N', k, n, m, 1.0, *w0, m, a, m, 0., numerh, k);  
    // calculating first intermediate result work1 = w0'*w0
    dgemm('T', 'N', k, k, m, 1.0, *w0, m, *w0, m, 0., work1, k);
    // calculating second intermediate result work2 = work1 * h0
    dgemm('N', 'N', k, n, k, 1.0, work1, k, *h0, k, 0., work2, k);

    //calculating elementwise matrixmultiplication, Division and addition h = h0 .* (numerh ./(work2 + eps))
    //set elements < zero_threshold to zero
    double tmp_element;
    for(i = 0; i< k*n; ++i) {
      if ( (*h0)[i] == 0. || numerh[i]  == 0.)
	h[i] = 0.;
      else {
	tmp_element = (*h0)[i] * (numerh[i] / (work2[i] + DIV_BY_ZERO_AVOIDANCE));
	h[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element;

    // calculating matrix w
    // calculating numerw = a*h'
    dgemm('N', 'T', m, k, n, 1.0, a, m, h, k, 0., numerw, m);
    // calculating first intermediate result work1 = h*h' (kxk-Matrix) => re-use of work1
    dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., work1, k);
    // calculating second intermediate result work2w = w0 * work1
    dgemm('N', 'N', m, k, k, 1.0, *w0, m, work1, k, 0., work2w, m);

    //calculating elementwise matrixmultiplication, Division and addition w = w0 .* (numerw ./ (work2w + eps))
    //set elements < zero_threshold to zero
    for(i = 0; i < m*k; ++i) {
      if ( (*w0)[i] == 0. || numerw[i] == 0.)
	w[i] = 0.;
      else {
	tmp_element = (*w0)[i] * (numerw[i] / (work2w[i] + DIV_BY_ZERO_AVOIDANCE));
	w[i] = (tmp_element < ZERO_THRESHOLD) ? 0. : tmp_element;

    // calculating the norm of D = A-W*H
    dnorm = calculateNorm(a, w, h, d, m, n, k);

    // calculating change in w -> dw
    double dw = calculateMaxchange(w, *w0, m, k, sqrteps);

    // calculating change in h -> dh
    double dh = calculateMaxchange(h, *h0, k, n, sqrteps);

    //Max-Change = max(dh, dw) = delta
    double delta = 0.0;
    delta = (dh > dw) ? dh : dw;

    // storing the matrix results of the current iteration in W0 respectively H0
    swap(w0, &w);
    swap(h0, &h);

    //Check for Convergence
    if (iter > 1) {
      if (delta < TolX) {
	*maxiter = iter;
	if (dnorm <= TolFun*dnorm0) {
	*maxiter = iter;


    // storing the norm result of the current iteration
    dnorm0 = dnorm;

#if DEBUG_LEVEL >= 1
  printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta);
  } //end of loop from 1 to maxiter

#if DEBUG_LEVEL >= 2
	printf("Exiting nmf_mu\n");
	gettimeofday(&end, 0);
	outputTiming("", start, end);

  // freeing memory if used

  // returning calculated norm
  return dnorm;