Example #1
0
void lutsolve(Matrix A, Vector x, char uplo)
{
  char trans='N';
  char diag='N';
  int one=1;
  int info;
  dtrtrs(&uplo, &trans, &diag, &A->rows, &one,
         A->data[0], &A->rows, x->data, &A->rows, &info);
}
Example #2
0
/************************************
  Given the factorization LB = U for some B, solve the problem
  Bx = vec for x
  Solve using LAPACK functions.
************************************/
void LU_Solve1(PT_Matrix pL, PT_Matrix pUt, double *vec, double *x, ptrdiff_t *info)
{
	ptrdiff_t n, incx=1;
	char U='U', N='N',T='T';
	double alpha=1.0, beta=0.0;

	n = Matrix_Rows(pL);

	/* solve using lapack */
	/* compute x = L*vec */
	dgemv(&T, &n, &n, &alpha, pL->A, &(pL->rows_alloc), vec, &incx, &beta, x, &incx);

	/* solve U*xnew = x using lapack function that also checks for singularity */
	dtrtrs(&U, &N, &N, &n, &incx, pUt->A, &(pUt->rows_alloc), x, &n, info);

	/* printf("my x=\n"); */
	/* Vector_Print_raw(x,n); */
}
Example #3
0
double nmf_neals(double * a, double * w0, double * h0, int * pm, int * pn, \
		      int * pk, int * maxiter, const double * pTolX, const double * pTolFun) 
{

// code added to be able to call from R
int m = * pm;
int n = * pn;
int k = * pk;
const double TolX = * pTolX;
const double TolFun = * pTolFun;
// also: changed w0, h0 to simple pointer (instead of double)
// // end code added

#ifdef PROFILE_NMF_NEALS
	struct timeval start, end;
	gettimeofday(&start, 0);
#endif
#if DEBUG_LEVEL >= 2
	printf("Entering nmf_neals\n");
#endif


#ifdef ERROR_CHECKING
  errno = 0;
#endif


  double * help1 = (double*) malloc(sizeof(double)*k*k);
  double * help2 = (double*) malloc(sizeof(double)*k*n);
  double * help3 = (double*) malloc(sizeof(double)*k*m);

  //-----------------------------------------



  // definition of necessary dynamic data structures
  //...for calculating matrix h
  double* h = (double*) malloc(sizeof(double)*k*n);
  int* jpvt_h = (int*) malloc(sizeof(int)*k);
  int info;
  //...for calculating matrix w
   double* w = (double*) malloc(sizeof(double)*m*k);
  //----------------


  //...for calculating the norm of A-W*H
  double* d = (double*) malloc(sizeof(double)*m*n);					//d = a - w*h
  double dnorm0 = 0;
  double dnorm = 0;
  const double eps = dlamch('E');					//machine precision epsilon
  const double sqrteps = sqrt(eps);					//squareroot of epsilon
  

  //-------------------

#ifdef ERROR_CHECKING
  if (errno) {
    perror("Error allocating memory in nmf_neals");
    free(help1);
    free(help2);
    free(help3);
    free(h);
    free(jpvt_h);
    free(w);
    free(d);
    return -1;
  }
#endif


  // declaration of data structures for switch to als algorithm
  // ----------------------------------------------------------

  int als_data_allocated = 0;					// indicates wheter data structures were already allocated
  // factor matrices for factorizing matrix w
  double * q;
  double * r;
  // factor matrices for factorizing matrix h
  double * q_h;
  double * r_h;

  double* tau_h;                   //stores elementary reflectors of factor matrix Q
  double* work_w;            //work array for factorization of matrix w
  int lwork_w;
  double* work_h;            //work array for factorization of matrix h  
  int lwork_h;
  double * work_qta;	     //work array for dorgqr
  int lwork_qta;
  double * work_qth;	     //work array for dorgqr
  int lwork_qth;

  //query for optimal workspace size for routine dgeqp3...
  double querysize;
  


  

  
  //Loop-Indices
  int iter, i;

  //variable for storing if fallback happened in current iteration
  int fallback;

  // factorisation step in a loop from 1 to maxiter
  for (iter = 1; iter <= *maxiter; ++iter) {

    //no fallback in this iteration so far
    fallback = 0;

    // calculating matrix h
    //----------------
    //help1 = w0'*w0
    dgemm('T', 'N', k, k, m, 1.0, w0, m, w0, m, 0., help1, k);
    //help2 = w0'*a
    dgemm('T', 'N', k, n, m, 1.0, w0, m, a, m, 0., help2, k);
    //LU-Factorisation of help1 to solve equation help1 * x = help2
    dgesv(k, n, help1, k, jpvt_h, help2, k, &info);
    // if factor matrix U is singular -> switch back to als algorithm to compute h
    if( info > 0) {

	//set fallback to 1 to  indicate that fallback happened
	fallback = 1;

	// do dynamic data structures need to be allocated?
	if (!als_data_allocated) {
	  als_data_allocated = 1;

  	  // factor matrices for factorizing matrix w
	  q = (double*) malloc(sizeof(double)*m*k);
	  r = (double*) malloc(sizeof(double)*m*k);
	  // factor matrices for factorizing matrix h
	  q_h = (double*) malloc(sizeof(double)*n*k);
	  r_h = (double*) malloc(sizeof(double)*n*k);

	  tau_h = (double*) malloc(sizeof(double)*k);                   //stores elementary reflectors of factor matrix Q


          //query for optimal workspace size for routine dgeqp3...
	  //for matrix w
	  dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info);
	  lwork_w = (int) querysize;
	  work_w = (double*) malloc(sizeof(double)*lwork_w);            //work array for factorization of matrix help1 (dgeqp3)
	  //for matrix h
	  dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info);
	  lwork_h = (int) querysize;
	  work_h = (double*) malloc(sizeof(double)*lwork_h);            //work array for factorization of matrix h


	  //query for optimal workspace size for routine dorgqr...
	  //for matrix w
	  dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info);
	  lwork_qta = (int)querysize;
	  work_qta = (double*) malloc(sizeof(double)*lwork_qta);	  //work array for dorgqr
	  //for matrix h
	  dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info);
	  lwork_qth = (int)querysize;
	  work_qth = (double*) malloc(sizeof(double)*lwork_qth);

	}


        // calculating matrix h
        //----------------
        //re-initialization

        //copy *w0 to q
        dlacpy('A', m, k, w0, m, q, m);


        //initialise jpvt_h to 0 -> every column free
        for (i = 0; i<k; ++i)
          jpvt_h[i] = 0;

        // Q-R factorization with column pivoting
        dgeqp3(m, k, q, m, jpvt_h, tau_h, work_w, lwork_w, &info);


        //copying upper triangular factor-matrix r out of q into r
        dlacpy('U', m, k, q, m, r, k);


        //Begin of least-squares-solution to w0 * x = a

        //generate explicit matrix q (m times k) and calculate q' * a
        dorgqr(m, k, k, q, m, tau_h, work_qta, lwork_qta, &info);
        dgemm('T', 'N', k, n, m, 1.0, q, m, a, m, 0.0, q_h, k);


        //solve R * x = (Q'*A)
        dtrtrs('U','N','N',k,n,r,k,q_h,k,&info);

        //copy matrix q to h, but permutated according to jpvt_h
        for (i=0; i<k; ++i) {
          dcopy(n, q_h + i, k, h + jpvt_h[i] - 1, k);
        }

        //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint
        for (i=0; i<k*n; ++i) {
        if (h[i] < ZERO_THRESHOLD)
          h[i] = 0.;
        }


    }
    else {
      //h = max(ZERO_THRESHOLD, help1\help2)
      for (i=0; i < k*n; ++i)
        h[i] = (help2[i] > ZERO_THRESHOLD ? help2[i] : 0.);
    }


	    // calculating matrix w = max(0, help1\help3)'
	    //----------------------------
	    //help1 = h*h'
	    dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., help1, k);
	    //help3 = h*a'
	    dgemm('N', 'T', k, m, n, 1.0, h, k, a, m, 0., help3, k);
	    //LU-Factorisation of help1
	    dgesv(k, m, help1, k, jpvt_h, help3, k, &info);
	    //
	    if( info > 0) {
		// do dynamic data structures need to be allocated?
	        if (!als_data_allocated) {
	          als_data_allocated = 1;

	          // factor matrices for factorizing matrix w
	          q = (double*) malloc(sizeof(double)*m*k);
	          r = (double*) malloc(sizeof(double)*m*k);
	          // factor matrices for factorizing matrix h
	          q_h = (double*) malloc(sizeof(double)*n*k);
	          r_h = (double*) malloc(sizeof(double)*n*k);
	
	          tau_h = (double*) malloc(sizeof(double)*k);                   //stores elementary reflectors of factor matrix Q
	
	
	          //query for optimal workspace size for routine dgeqp3...
		
		  //for matrix w
        	  dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info);
	          lwork_w = (int) querysize;
        	  work_w = (double*) malloc(sizeof(double)*lwork_w);            //work array for factorization of matrix help1 (dgeqp3)
	
		  //..for matrix h
		  dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info);
		  lwork_h = (int) querysize;
		  work_h = (double*) malloc(sizeof(double)*lwork_h);            //work array for factorization of matrix h
	
	
        	  //query for optimal workspace size for routine dorgqr...
		  //for matrix w
	          dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info);
	          lwork_qta = (int)querysize;
	          work_qta = (double*) malloc(sizeof(double)*lwork_qta);          //work array for dorgqr
		  // ... for matrix h
		  dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info);
		  lwork_qth = (int)querysize;
		  work_qth = (double*) malloc(sizeof(double)*lwork_qth);
	  
	        } 

	
		//calculating matrix w
	        //copy original matrix h to q_h, but transposed
	        for (i=0; i<k; ++i) {
	          dcopy(n, h + i, k, q_h + i*n, 1);
	        }


        	//initialise jpvt_a to 0 -> every column free
	        for (i = 0; i<k; ++i)
	          jpvt_h[i] = 0;
	
        	//Q-R factorization
	        dgeqp3(n, k, q_h, n, jpvt_h, tau_h, work_h, lwork_h, &info);


	        //copying upper triangular factor-matrix r_h out of q into r_h
	        dlacpy('U', n, k, q_h, n, r_h, k);
	

	        //Begin of least-squares-solution to w0 * x = a
 
	        //generate explicit matrix q (n times k) and calculate *a = q' * a'
	        dorgqr(n, k, k, q_h, n, tau_h, work_qth, lwork_qth, &info);
	        dgemm('T', 'T', k, m, n, 1.0, q_h, n, a, m, 0.0, q, k);




	        //solve R_h * x = (Q'*A')
	        dtrtrs('U', 'N', 'N', k, m, r_h, k, q, k, &info);


        	//jpvt_h*(R\(Q'*A')) permutation and transposed copy to w
	        for (i=0; i<k; ++i) {
	          dcopy(m, q + i, k, w + m * (jpvt_h[i] - 1), 1);
        	}

	        //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint
        	for (i=0; i<k*m; ++i) {
	          if (w[i] < ZERO_THRESHOLD)
	        	    w[i] = 0.;
        	}


	    }

	    else {
	        //w = max(0, help3)'
	        for (i=0; i<k; ++i) {
	          dcopy(m, help3 + i, k, w + i*m, 1);
	        }
	        for (i=0; i<m*k; ++i) {
	          if (w[i] < ZERO_THRESHOLD)
	            w[i] = 0.;
	        }
	    }




    
    // calculating the norm of D = A-W*H
    dnorm = calculateNorm(a, w, h, d, m, n, k);

    
    // calculating change in w -> dw
    //----------------------------------
    double dw;
    dw = calculateMaxchange(w, w0, m, k, sqrteps);

    
    // calculating change in h -> dh
    //-----------------------------------
    double dh;
    dh = calculateMaxchange(h, h0, k, n, sqrteps);

    //Max-Change = max(dh, dw) = delta
    double delta;
    delta = (dh > dw) ? dh : dw;

   

    // storing the matrix results of the current iteration
    swap(&w0, &w);
    swap(&h0, &h);



    // storing the norm results of the current iteration
    dnorm0 = dnorm;


#if DEBUG_LEVEL >= 1
  printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta);
#endif   


    //Check for Convergence
    if (iter > 1) {
      if (delta < TolX) {
        *maxiter = iter;
        break;
      }
      else
        if (dnorm <= TolFun*dnorm0) {
        *maxiter = iter;
        break;
        }


    }
   
  } //end of loop from 1 to maxiter

#if DEBUG_LEVEL >= 2
	printf("Exiting nmf_neals\n");
#endif
#ifdef PROFILE_NMF_NEALS
	gettimeofday(&end, 0);
	outputTiming("", start, end);
#endif

  // freeing memory if used
    free(help1);
    free(help2);
    free(help3);
    free(h);
    free(jpvt_h);
    free(w);
    free(d);

    if(als_data_allocated) {
      free(q);
      free(r);
      free(q_h);
      free(r_h);
      free(work_h);
      free(work_w);
      free(tau_h);
      free(work_qta);
      free(work_qth);
    }

  // returning calculated norm
  return dnorm;
}
// Sample factor vectors
// Function written from perspective of sampling user factor vectors with cross-topics
// Switch roles of user-item inputs to sample item factor vectors
void sampleTopicFactorVectors(uint32_t* items, double* resids, const mxArray* exampsByUser,
			      int KU, int KM, int numUsers, int numItems, double invSigmaSqd, 
			      ptrdiff_t numTopicFacs, double* LambdaU, double* muU, double* c, double* d, 
			      uint32_t* zU, uint32_t* zM){
   // Array of random number generators
   gsl_rng** rngs = getRngArray();  
 
   // Extract internals of jagged arrays
   uint32_t** userExamps;
   mwSize* userLens;
   unpackJagged(exampsByUser, &userExamps, &userLens, numUsers);

   ptrdiff_t numTopicFacsSqd = numTopicFacs*numTopicFacs;
   ptrdiff_t numTopicFacsTimesNumItems = numTopicFacs*numItems;
   ptrdiff_t numTopicFacsTimesNumUsers = numTopicFacs*numUsers;

   // BLAS constants
   char uplo[] = "U";
   char trans[] = "N";
   char diag[] = "N";
   ptrdiff_t oneInt = 1;
   double oneDbl = 1;
   double zeroDbl = 0;

   // Compute muBase = LambdaU*muU
   double* muBase = mxMalloc(numTopicFacs*sizeof(*muBase));
   dsymv(uplo, &numTopicFacs, &oneDbl, LambdaU, &numTopicFacs, muU, &oneInt, &zeroDbl, muBase, &oneInt);

   // Allocate memory for new mean and precision parameters
   double** muNew[MAX_NUM_THREADS];
   double** LambdaNew[MAX_NUM_THREADS];
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++){
      muNew[thread] = mxMalloc(KM*sizeof(**muNew));
      LambdaNew[thread] = mxMalloc(KM*sizeof(**LambdaNew));
      for(int i = 0; i < KM; i++){
	 muNew[thread][i] = mxMalloc(numTopicFacs*sizeof(***muNew));
	 LambdaNew[thread][i] = mxMalloc(numTopicFacsSqd*sizeof(***LambdaNew));
      }
   }

#pragma omp parallel for
   for(int u = 0; u < numUsers; u++){
      int thread = omp_get_thread_num();
      for(int i = 0; i < KM; i++){
	 // Initialize new mean to muBase
	 dcopy(&numTopicFacs, muBase, &oneInt, muNew[thread][i], &oneInt);
	 // Initialize new precision to LambdaU
	 dcopy(&numTopicFacsSqd, LambdaU, &oneInt, LambdaNew[thread][i], &oneInt);
      }

      // Iterate over user's examples
      mxArray* exampsArray = mxGetCell(exampsByUser, u);
      mwSize len = mxGetN(exampsArray);
      uint32_t* examps = (uint32_t*) mxGetData(exampsArray);
      for(int j = 0; j < len; j++){
	 uint32_t e = examps[j]-1;
	 int m = items[e]-1;
	 int userTop = zU[e]-1;
	 int itemTop = zM[e]-1;

	 // Item vector for this rated item
	 double* dVec = d + m*numTopicFacs + userTop*numTopicFacsTimesNumItems;

	 // Compute posterior sufficient statistics for factor vector
	 // Add resid * dVec/sigmaSqd to muNew
	 double resid = resids[e];
	 resid *= invSigmaSqd;
	 daxpy(&numTopicFacs, &resid, dVec, &oneInt, muNew[thread][itemTop], &oneInt);

	 // Add (dVec * dVec^t)/sigmaSqd to LambdaNew
	 // Exploit symmetric structure of LambdaNew
	 dsyr(uplo, &numTopicFacs, &invSigmaSqd, dVec, &oneInt, LambdaNew[thread][itemTop], 
	      &numTopicFacs);
      }
      
      for(int i = 0; i < KM; i++){
	 // Compute upper Cholesky factor of LambdaNew
	 ptrdiff_t info;
	 dpotrf(uplo, &numTopicFacs, LambdaNew[thread][i], &numTopicFacs, &info);
	 
	 // Solve for (LambdaNew)^-1*muNew using Cholesky factor
	 dpotrs(uplo, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, muNew[thread][i], 
		&numTopicFacs, &info);
	 
	 // Sample vector of N(0,1) variables
	 gsl_rng* rng = rngs[thread];
	 double* cVec = c + u*numTopicFacs + i*numTopicFacsTimesNumUsers;
	 for(int f = 0; f < numTopicFacs; f++)
	    cVec[f] = gsl_ran_gaussian(rng, 1);
	 
	 // Solve for (chol(LambdaNew,'U'))^-1*N(0,1)
	 dtrtrs(uplo, trans, diag, &numTopicFacs, &oneInt, LambdaNew[thread][i], 
		&numTopicFacs, cVec, &numTopicFacs, &info);
	 
	 // Add muNew to aVec
	 daxpy(&numTopicFacs, &oneDbl, muNew[thread][i], &oneInt, cVec, &oneInt);
      }
   }
   // Clean up
   mxFree(userExamps);
   mxFree(userLens);
   mxFree(muBase);
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++){
      for(int i = 0; i < KM; i++){
	 mxFree(muNew[thread][i]);
	 mxFree(LambdaNew[thread][i]);
      }
      mxFree(muNew[thread]);
      mxFree(LambdaNew[thread]);
   }
}
Example #5
0
int main(){
  double *A, *A2, *b, *b2, *L, *T, *P, *D, *SD1, *SD2, *PT, *WORK;
  int SIZE, m, POSX, POSY, NUML, NMBR, i, j, NRHS=1, INFO, *IPIV, NUMM;
  char NAME[20], FNAM[30], UPLO='L', TRANS='N', DIAG='U';
  FILE *FILI,*FILI2,*FILO, *FIL3;

    FIL3 = fopen("INPUT.dat","r");
// LECTURE DU NOMBRE DE MATRICE A TRAITER
    fscanf(FIL3,"%d",&NMBR);
    for(NUMM = 0; NUMM<NMBR;NUMM++){
// LECTURE DU NOM DE LA MATRICE ACTUELLE ET OUVERTURE DES FICHIERS
        fscanf(FIL3,"%s",NAME);
        FILI = fopen(NAME,"r");
        if(FILI==NULL){
            printf("Fichier %s introuvable !!!\n",NAME);
            continue;
        }
        sprintf(HEAD,"rhs_%s.dat",NAME);
        FILI2 = fopen(FNAM,"r");
        sprintf(HEAD,"result_%s.dat",NAME);
        FILO = fopen(FNAM,"w+");

// ALLOCATION DES DIFFERENTS TABLEAUX
        fscanf(FILI,"%d %d %d",&SIZE, &m, &NUML);
        A    = (double *) calloc(SIZE*SIZE,sizeof(double));
        A2   = (double *) calloc(SIZE*SIZE,sizeof(double));
        b    = (double *) calloc(SIZE,sizeof(double));
		b2   = (double *) calloc(SIZE,sizeof(double));
        P    = (double *) calloc(SIZE*SIZE,sizeof(double));
        PT   = (double *) calloc(SIZE*SIZE,sizeof(double));    
        L    = (double *) calloc(SIZE*SIZE,sizeof(double));
        T    = (double *) calloc(SIZE*SIZE,sizeof(double));
        D    = (double *) calloc(SIZE,sizeof(double));
        SD1  = (double *) calloc(SIZE-1,sizeof(double));
        SD2  = (double *) calloc(SIZE-1,sizeof(double));
        WORK = (double *) calloc(SIZE,sizeof(double));
        IPIV = (int*)     calloc(SIZE,sizeof(int));    

// LECTURE DES VALEURS DE A
        for(i=0;i<NUML;i++){
            fscanf(FILI,"%d %d",&POSX, &POSY);
            fscanf(FILI,"%lf",A+POSX-1+SIZE*(POSY-1));
            A2[(POSX-1)+SIZE*(POSY-1)] = A[(POSX-1)+SIZE*(POSY-1)];
        }

// LECTURE DES VALEURS DE b
        for(i=0;i<SIZE;i++){
            fscanf(FILI2,"%lf",b+i);
            b2[i] = b[i];
        }

// METHODE DE AASEN
//   CALCUL DES MATRICES T,L ET P
        aasen(SIZE,A,T,L,P);

//   RESOLUTION DES AUTRES EQUATIONS
        b = gaxPD(P,SIZE,SIZE,b);
        for(i=0;i<SIZE;i++) D[i] = T[i+SIZE*i];
        for(i=0;i<SIZE-1;i++) SD1[i] = T[i+SIZE*(i+1)], SD2[i] = T[i+1+SIZE*i];
        for(i=0;i<SIZE;i++) for(j=0;j<SIZE;j++) PT[i+SIZE*j]=P[j+SIZE*i];

        dtrtrs(&UPLO, &TRANS, &DIAG, &SIZE, &NRHS, L, &SIZE, b, &SIZE, &INFO);
        dgtsv(&SIZE, &NRHS, SD1, D, SD2, b, &SIZE, &INFO);
        TRANS = 'T';
        dtrtrs(&UPLO,&TRANS,&DIAG,&SIZE,&NRHS,L,&SIZE,b,&SIZE,&INFO);

//   CALCUL DE LA SOLUTION FINALE
        b = gaxPD(PT,SIZE,SIZE,b);

// METHODE DE BUNCH-PARLETT
        dsytrf_(&UPLO, &SIZE, A2, &SIZE, IPIV, WORK, &SIZE, &INFO);
        dsytrs_(&UPLO, &SIZE, &NRHS, A2, &SIZE, IPIV, b2, &SIZE, &INFO);

// ECRITURE DANS LE FICHIER DE SORTIE
        for(i=0;i<SIZE;i++) fprintf(FILO,"%lf\t%lf\SIZE",b[i],b2[i]);

// LIBERATION DE LA MEMOIRE ET FERMETURE DES FICHIERS
        free(A);free(b);free(A2);free(b2);free(P);free(PT);free(L);free(T);free(D);free(SD1);
        free(SD2);
        fclose(FILI); fclose(FILI2); fclose(FILO);
    }
    return(0);
}