void solver (int n) { int i, lda, ldb, nrhs, info, *ipiv; double *a, *b; // Get matrix dimension and allocate arrays lda = n; ldb = n; nrhs = 1; a = (double *) malloc(sizeof(double) * n * n); b = (double *) malloc(sizeof(double) * n); ipiv = (int *) malloc(sizeof(int) * n); // Fill matrix a and vector b with random values for (i=0; i<n*n; i++) a[i] = drand48(); for (i=0; i<n; i++) b[i] = drand48(); dgesv(&n, &nrhs, a, &lda, ipiv, b, &ldb, &info); free(a); free(b); free(ipiv); return; }
int main(void) { int n=4096; int nrhs = n; int lda = n; int ldb = n; int info = 0; // Define matrix variables double *A; int *ipiv; double *B; // Allocate matrices A = (double*) malloc(lda*n*sizeof(double)); ipiv = (int*) malloc(n*sizeof(int)); B = (double*) malloc(ldb*nrhs*sizeof(double)); // Add some random data fillRandomDouble(lda, n, A, -10.0f, 10.0f); fillRandomDouble(ldb, nrhs, B, -10.0f, 10.0f); // Solve the system A*X=B using Intel MKL libraries dgesv(&n, &nrhs, A, &lda, ipiv, B, &ldb, &info); // Free the memory free(A); free(ipiv); free(B); }
void lusolve(Matrix A, Vector x, int** ipiv) { if (*ipiv == NULL) *ipiv = malloc(x->len*sizeof(int)); int one=1; int info; dgesv(&x->len,&one,A->data[0],&x->len,*ipiv,x->data,&x->len,&info); }
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { /* mex interface to LAPACK functions dsysvx and zhesvx */ int m, n, bnrhs, lda, *ipiv, ldb, info; double *A,*b, *AT, *bT, *ret; int i; if (nrhs != 2) { mexErrMsgTxt("Expect 2 input arguments and return 1 output argument"); } A = mxGetPr(prhs[0]); b = mxGetPr(prhs[1]); m = mxGetM(prhs[0]); n = mxGetN(prhs[0]); bnrhs = mxGetN(prhs[1]); AT = (double *)mxCalloc(m*n,sizeof(double)); bT = (double *)mxCalloc(m*bnrhs,sizeof(double)); ipiv = (int *)mxCalloc(m,sizeof(int)); for(i=0;i<m*bnrhs;i++) { bT[i] = b[i]; } for(i=0;i<m*n;i++) AT[i] = A[i]; lda = m; ldb = m; dgesv(&m, &bnrhs, AT, &lda, ipiv, bT, &ldb, &info); plhs[0] = mxCreateDoubleMatrix(m,bnrhs,mxREAL); ret = mxGetPr(plhs[0]); for(i=0;i<m*bnrhs;i++) ret[i] = bT[i]; mxFree(AT); mxFree(bT); mxFree(ipiv); }
// ================================ // solve linear equations C = A\B // ================================ void solve(double* C, double* A, double* B, const int rA, const int cA, const int rB, const int cB, const char *mod, double *W, const int LW, ptrdiff_t *S) { #ifdef USE_BLAS int i, j, rank; char uplo = 'U', side = 'L', trans = 'N', unit = 'N'; double one = 1.0, rcond = 0.000000001; ptrdiff_t rA0 = rA, cA0 = cA, cB0 = cB, Lwork=LW, info; ptrdiff_t rC0 = (rA>cA) ? rA : cA; //ptrdiff_t ptr_S = S; switch (mod[0]){ case 'L': case 'U': uplo = mod[0]; dtrsm(&side, &uplo, &trans, &unit, &rC0, &cB0, &one, A, &rA0, C, &rC0); break; case 'P': dposv(&uplo, &rA0, &cB0, W, &rA0, C, &rA0, &info);// A has already been copied into W break; default: if (rA == cA) { //dgesv(&rA0, &cB0, W, &rA0, S, C, &rA0, &info);// A has already been copied into W dgesv(&rA0, &cB0, W, &rA0, (ptrdiff_t*)S, C, &rA0, &info);// A has already been copied into W } else{ for( i=0; i<cB; i++ ) for( j=0; j<rB; j++ ) C[i*rC0+j] = B[i*rB+j]; //dgelsy(&rA0, &cA0, &cB0, A, &rA0, C, &rC0, S, &rcond, &rank, W, &Lwork, &info); dgelsy(&rA0, &cA0, &cB0, A, &rA0, C, &rC0, (ptrdiff_t*)S, &rcond, (ptrdiff_t*)&rank, W, &Lwork, &info); } } #endif }
int bundle_qp_solve_mask (bundle_t *bundle, uint64_t mask) /* * Attempts to solve the penalized bundle QP assuming the zero/nonzero constraint multiplier combination specified in 'mask' is correct: * max z - 1/2t <x^T,x> * s.c * z - <A_i,x> <= b_i, i = 1,...,m : mul * z scalar, x n-dimensional vector * * This QP is convex, therefore solving it is equivalent to solving the KKT conditions system. * * For all possible values of mask, * if the i-th bit of mask is 1: * => z* - <A_i,x*> = b_i. * if the i-th bit of mask is 0: * => mul*_i = 0. * * Also, supposing the KKT conditions are verified, we have, with k iterating over all 1-bits of mask, * grad(z* - 1/2t <x^T,x*>) = sum over k of (mul*_k . grad(z* - <A_k,x*>)) * <=> [1 | -1/t x*] = sum over i of (mul*_k . [1 | -A_k]) * <=> t . sum over k of (mul*_k . A_k) = x* * and sum over k of mul*_k = 1. * * Therefore by substituting x* we obtain the system: * z* - <A_i, t . sum over k of (mul*_k . A_k)> = b_i, for all i with the i-th bit of 'mask' is 1 * <=> | -t(A'.A'^T) e | . | mul | = | b |, with e = (1, ..., 1) and dim(e) = number of bits of mask set to 1 * | e^T 0 | | z | | 1 |, and with A' = A with the rows corresponding to the bits set to 0 in mask removed. * * If the system is not linearly independent, then the optimal solution cannot be found with this mask. * Otherwise, having mul* and z*, we verify dual feasibility, for all i set to 1 in mask: * <=> mul_i* >= 0 * Also, we verify dual optimality: * <=> z* is minimal for all dual-feasible z* found with other masks * Finally, we verify primal feasibility, for all i set to 0 in mask: * <=> z* - <A_i,x*> <= b_i, and substituting x*, * <=> z* - t . sum over k of mul*_k <A_i, A_k^T> <= b_i * * If these hold then all KKT conditions are verified and z* is optimal. */ { double z; int info, isize; uint64_t bit; int i, j, k; // get system size (in bundle->kkt_m) and active constraint indices (in bundle->kkt_i[]) // set system rhs (in bundle->kkt_mul[]) for (bundle->kkt_m = i = 0, bit = 1; i < bundle->m; i++, bit <<= 1) if (mask & bit) { bundle->kkt_i[bundle->kkt_m] = i; bundle->kkt_mul[bundle->kkt_m] = bundle->b[i]; ++bundle->kkt_m; } bundle->kkt_mul[bundle->kkt_m] = 1.0; isize = 1 + bundle->kkt_m; // set matrix dimension size for LAPACK // set system lhs (in bundle->kkt_a[][]) for (i = 0; i < bundle->kkt_m; i++) { for (j = 0; j < bundle->kkt_m; j++) bundle->kkt_a[i * isize + j] = bundle->kkt_a[j * isize + i] = - (bundle->aat[bundle->kkt_i[i] * bundle->max_m + bundle->kkt_i[j]] / bundle->scale); bundle->kkt_a[i * isize + bundle->kkt_m] = 1.0; bundle->kkt_a[bundle->kkt_m * isize + i] = 1.0; } bundle->kkt_a[bundle->kkt_m * isize + bundle->kkt_m] = 0.0; // solve system using LAPACK, to retrieve mul* and z* (in bundle->kkt_mul[]) info = dgesv(isize, bundle->kkt_a, bundle->ipiv, bundle->kkt_mul); // check if system has full rank if (info > 0) return 0; // system is linearly independent and has a unique solution z = bundle->kkt_mul[bundle->kkt_m]; // check dual feasibility for (i = 0; i < bundle->kkt_m; i++) if (bundle->kkt_mul[i] < 0.0) return 0; // check dual optimality if (z > bundle->kkt_z * 1.01) return 0; else if (z < bundle->kkt_z) bundle->kkt_z = z; // check primal feasibility for (k = i = 0, bit = 1; i < bundle->m; i++, bit <<= 1) { if (mask & bit) { bundle->next_b[i] = z; } else { bundle->next_b[i] = 0.0; for (j = 0; j < bundle->kkt_m; j++) bundle->next_b[i] += bundle->kkt_mul[j] * bundle->aat[i * bundle->max_m + bundle->kkt_i[j]]; bundle->next_b[i] /= bundle->scale; bundle->next_b[i] += bundle->b[i]; if (z > bundle->next_b[i] + bundle->epsilon) return 0; } } // the KKT conditions are all verified return 1; }
/* Return information if the solution was computed */ ptrdiff_t Basis_Solve(PT_Basis pB, double *q, double *x, T_Options options) { /* Basis B */ /* I = W union (Z+m) -> ordered */ /* X = M_/W,Z <- invertible */ /* G = M_W,Z */ /* Bx_B = q -> x = [w_W;z_Z] */ /* z_Z = iX*q_/W */ /* w_W = q_W - G*x_/W */ ptrdiff_t i, incx=1, nb, m, diml, info; char T; double alpha=-1.0, beta=1.0; /* x(1:length(W)) = q(W) */ for(i=0;i<Index_Length(pB->pW);i++) x[i] = q[Index_Get(pB->pW, i)]; /* X = [] */ if(Index_Length(pB->pWc) == 0) return 0; /* z = q(B.Wc) */ for(i=0;i<Index_Length(pB->pWc);i++) pB->z[i] = q[Index_Get(pB->pWc, i)]; /* because the right hand side will be overwritten in dgesv, store it into temporary vector r */ dcopy(&(Index_Length(pB->pWc)), pB->z, &incx, pB->r, &incx); /* printf("x :\n"); */ /* Vector_Print_raw(pB->z,Index_Length(pB->pW)); */ /* printf("z :\n"); */ /* Vector_Print_raw(pB->z,Index_Length(pB->pWc)); */ /* x = [x1;x2] */ /* x2 = inv(X)*q(Wc) = inv(X)*z */ nb = 1; /* number of right hand side in A*x=b is 1 */ m = Matrix_Rows(pB->pF); /* Find solution to general problem A*x=b using LAPACK All the arguments have to be pointers. A and b will be altered on exit. */ switch (options.routine) { case 1: /* DGESV method implements LU factorization of A. */ dgesv(&m, &nb, pMAT(pB->pF), &((pB->pF)->rows_alloc), pB->p, pB->r, &m, &info); break; case 2: /* DGELS solves overdetermined or underdetermined real linear systems involving an M-by-N matrix A, or its transpose, using a QR or LQ factorization of A. It is assumed that A has full rank. */ T = 'n'; /* Not transposed */ diml = 2*m; dgels(&T, &m, &m, &nb, pMAT(pB->pF), &((pB->pF)->rows_alloc), pB->r, &m, pB->s, &diml, &info ); break; default : /* solve with factors F or U */ /* solve using LUMOD (no checks) */ /* LU_Solve0(pB->pLX, pB->pUX, pB->z, &(x[Index_Length(pB->pW)])); */ /* solve using LAPACK (contains also singularity checks) */ /* need to pass info as a pointer, otherwise weird numbers are assigned */ LU_Solve1(pB->pLX, pB->pUt, pB->z, &(x[Index_Length(pB->pW)]), &info); /* if something went wrong, refactorize and solve again */ if (info!=0) { /* printf("info=%ld, refactoring\n", info); */ /* Matrix_Print_row(pB->pLX); */ /* Matrix_Print_utril_row(pB->pUX); */ /* Matrix_Print_col(pB->pUt); */ LU_Refactorize(pB); /* if this fails again, then no minimum ratio is found -> exit with * numerical problems flag */ LU_Solve1(pB->pLX, pB->pUt, pB->z, &(x[Index_Length(pB->pW)]), &info); } } /* x1 = -G*x2 + q(W) */ /* y = alpha*G*x + beta*y */ /* alpha = -1.0; */ /* beta = 1.0; */ T = 'n'; /* Not transposed */ if (options.routine>0) { /* take LAPACK solution */ /* printf("lapack solution z:\n"); */ /* Vector_Print_raw(pB->r,Index_Length(pB->pWc)); */ /* matrix F after solution is factored in [L\U], we want the original format for the next call to dgesv, thus create a copy F <- X */ Matrix_Copy(pB->pX, pB->pF, pB->w); /* printf("x after lapack solution:\n"); */ /* Vector_Print_raw(x,Index_Length(pB->pW)+Index_Length(pB->pWc)); */ /* recompute the remaining variables according to basic solution */ dgemv(&T, &(Matrix_Rows(pB->pG)), &(Matrix_Cols(pB->pG)), &alpha, pMAT(pB->pG), &((pB->pG)->rows_alloc), pB->r, &incx, &beta, x, &incx); /* append the basic solution at the end of x vector */ dcopy(&(Index_Length(pB->pWc)), pB->r, &incx, &(x[Index_Length(pB->pW)]), &incx); } else { /* take LUmod solution */ dgemv(&T, &(Matrix_Rows(pB->pG)), &(Matrix_Cols(pB->pG)), &alpha, pMAT(pB->pG), &((pB->pG)->rows_alloc), &(x[Index_Length(pB->pW)]), &incx, &beta, x, &incx); } /* printf("y:\n"); */ /* Vector_Print_raw(x,Matrix_Rows(pB->pG)); */ return info; }
void PolynomialInterp::InterpolateCoeffs( int nPoints, const double * dX, const double * dY, double * dA, double dXmid, double * dWorkspace, int * iPivot ) { // Check if an external workspace is specified bool fExternalWorkspace = (dWorkspace == NULL)?(false):(true); if (!fExternalWorkspace) { dWorkspace = new double[nPoints*nPoints]; } // Check if an external pivot vector is specified bool fExternalPivot = (iPivot == NULL)?(false):(true); if (!fExternalPivot) { iPivot = new int[nPoints]; } // Construct the Vandermonde matrix int i; int j; int k = 0; for (j = 0; j < nPoints; j++) { dWorkspace[k] = 1.0; k++; } for (i = 1; i < nPoints; i++) { for (j = 0; j < nPoints; j++) { dWorkspace[k] = (dX[j] - dXmid) * dWorkspace[k-nPoints]; k++; } } // Initialize A memcpy(dA, dY, nPoints * sizeof(double)); // Store CLAPACK parameters int n = nPoints; int nRHS = 1; int nLDA = nPoints; int nLDB = nPoints; int nInfo; #ifdef TEMPEST_LAPACK_ACML_INTERFACE // Call the matrix solve dgesv(n, nRHS, dWorkspace, nLDA, iPivot, dA, nLDB, &nInfo); #endif #ifdef TEMPEST_LAPACK_ESSL_INTERFACE // Call the matrix solve dgesv(n, nRHS, dWorkspace, nLDA, iPivot, dA, nLDB, nInfo); #endif #ifdef TEMPEST_LAPACK_FORTRAN_INTERFACE // Call the matrix solve dgesv_(&n, &nRHS, dWorkspace, &nLDA, iPivot, dA, &nLDB, &nInfo); #endif // Delete workspace if (!fExternalWorkspace) { delete[] dWorkspace; } if (!fExternalPivot) { delete[] iPivot; } }
void dgWhat(Utype *What, Utype *Vhat, Utype *U, double beta){ extern void dgesv( int* n, int* nrhs, double* a, int* lda, int* ipiv, double* b, int* ldb, int* info ); int i; //initialization for iteration int j; //initialization for iteration int k; //initialization for iteration int po; //-------------------------------------------------------------------------- //Initialization for Lapack function DGESV_ //-------------------------------------------------------------------------- //see http://www.netlib.no/netlib/lapack/double/dgesv_.f for more information int n= 3*(What->basis.r+1); //Num of lin equ, order of the matrix A. N>=0 int nrhs = 1; //Num of rhs, i.e., num of columns of matrix B int lda=3*(What->basis.r+1); //Leading dimen of A. LDA >= max(1,N) int ldb = n; //Leading dimen of the array int info = -1; //Parameter that tells if DGESV_ operation was succesful int ipiv[3*(What->basis.r+1)]; //pivot indices define permutation matrix P //Initialize Residual, Jacobian, and w arrays double *R = (double* ) malloc ((What->basis.r+1)*3*sizeof(double)); double **dRkdU = (double **) malloc ((What->basis.r+1)*3*sizeof(double*)); double **dRdU = (double **) malloc ((What->basis.r+1)*3*sizeof(double*)); //Contains difference in states after the linear dG operation via DGESV_ double w[(What->basis.r+1)*3]; //Solution to the Raphson Newton system // Next section continues to finish initializing dRkdU, dRdU such that //the memory is all in one place and in a way that works for DGESV_ function dRdU[0] = (double *) malloc ((What->basis.r+1)*3*(What->basis.r+1)*3*sizeof(double)); for (i=1; i<3*(What->basis.r+1); i++){ dRdU[i]=dRdU[i-1]+3*(What->basis.r+1); } dRkdU[0]= (double*) malloc ((What->basis.r+1)*3*(What->basis.r+1)*3*sizeof(double)); for (i=1; i<3*(What->basis.r+1); i++){ dRkdU[i]=dRkdU[i-1]+3*(What->basis.r+1); } //==============================Implementation=--=========================== for (i=0; i<What->basis.r+1; i++){ //Make an initialial guess of x, y, z at r+1 points for t=0 window What->solution.x.array[(What->tSpan.size-2)*(What->basis.r+1)+i]=1.1; What->solution.y.array[(What->tSpan.size-2)*(What->basis.r+1)+i]=1.1; What->solution.z.array[(What->tSpan.size-2)*(What->basis.r+1)+i]=1.1; } for (i=What->tSpan.size-1; i-->0;){ //Iterate through all the time intervals //Starting from the last time interval, solving backwards for ( j=0; j<What->basis.r+1; j++){ What->timett.array[(What->basis.r+1)*i+j]= What->tSpan.array[i]-(xiL(What->basis.r, j)+1)/2.0*What->dt; } residualLgrng(U, Vhat, What, i, R, beta); //Calc residual 4 current t jacobianLgrng(U, Vhat, What, i, dRdU, dRkdU);//Calc jacobian 4 current t for (j=0; j<3*(What->basis.r+1); j++){ //Residual R to w and neg w[j]=-R[j]; } //Calculation of the update vectors; magic happens here dgesv(&n,&nrhs, dRdU[0], &lda, ipiv, w, &ldb, &info ); //w = update //Need to update the solution now for (j=0; j<(What->basis.r+1); j++){ //Iterate and add w to the solution What->solution.x.array[i*(What->basis.r+1)+j]+=w[j*3]; What->solution.y.array[i*(What->basis.r+1)+j]+=w[j*3+1]; What->solution.z.array[i*(What->basis.r+1)+j]+=w[j*3+2]; } //Need to update the solution now if (i>What->tSpan.size-2){ for (j=0; j<What->basis.r+1; j++){ What->solution.x.array[(i+1)*(What->basis.r+1)+j]= What->solution.x.array[i*(What->basis.r+1)+j]; What->solution.y.array[(i+1)*(What->basis.r+1)+j]= What->solution.y.array[i*(What->basis.r+1)+j]; What->solution.z.array[(i+1)*(What->basis.r+1)+j]= What->solution.z.array[i*(What->basis.r+1)+j]; } } } //Free everything! free(R); free(dRkdU[0]); free(dRkdU); free(dRdU[0]); free(dRdU); }
void dgVhat(Utype *Vhat, Utype *U){ //==============Initialization of Parameters and Variables================== extern void dgesv( int* n, int* nrhs, double* a, int* lda, int* ipiv, double* b, int* ldb, int* info ); int i; //initialization for iteration int j; //initialization for iteration int k; //initialization for iteration //-------------------------------------------------------------------------- //Initialization for Lapack function DGESV //-------------------------------------------------------------------------- //see http://www.netlib.no/netlib/lapack/double/dgesv.f for more information int n= 3*(Vhat->basis.r+1); //Num of lin equ, order of the matrix A. N>=0 int nrhs = 1; //Num of rhs, i.e., num of columns of matrix B int lda=3*(Vhat->basis.r+1); //Leading dimen of A. LDA >= max(1,N) int ldb = n; //Leading dimen of the array int info = -1; //Parameter that tells if DGESV operation was succesful int ipiv[3*(Vhat->basis.r+1)]; //pivot indices define permutation matrix P //Initialize Residual, Jacobian, and w arrays double *R = (double* ) malloc ((Vhat->basis.r+1)*3*sizeof(double)); double **dRkdU = (double **) malloc ((Vhat->basis.r+1)*3*sizeof(double*)); double **dRdU = (double **) malloc ((Vhat->basis.r+1)*3*sizeof(double*)); //Contains difference in states after the linear dG operation via DGESV double w[(Vhat->basis.r+1)*3]; //Solution to the Raphson Newton system // Next section continues to finish initializing dRkdU, dRdU such that //the memory is all in one place and in a way that works for DGESV function dRdU[0] = (double *) malloc ((Vhat->basis.r+1)*3*(Vhat->basis.r+1)*3*sizeof(double)); for (i=1; i<3*(Vhat->basis.r+1); i++){ dRdU[i]=dRdU[i-1]+3*(Vhat->basis.r+1); } dRkdU[0]= (double*) malloc ((Vhat->basis.r+1)*3*(Vhat->basis.r+1)*3*sizeof(double)); for (i=1; i<3*(Vhat->basis.r+1); i++){ dRkdU[i]=dRkdU[i-1]+3*(Vhat->basis.r+1); } //============================Implementation================================ for (i=0; i<Vhat->basis.r+1; i++){ Vhat->solution.x.array[i]=1.1; //Initial guess x at r+1 pts 4 1st window Vhat->solution.y.array[i]=1.1; //Initial guess y at r+1 pts 4 1st window Vhat->solution.z.array[i]=1.1; //Initial guess z at r+1 pts 4 1st window } for (i=0; i<Vhat->tSpan.size-1; i++){ //Iterate through all time intervals //find the time array that is associate with r+1 points for (j = 0; j<Vhat->basis.r+1; j++){ Vhat->timett.array[(Vhat->basis.r+1)*i+j]= Vhat->tSpan.array[i]+(xiL(Vhat->basis.r, j)+1)/2.0*Vhat->dt; } residualTan(U, Vhat, i, R); //Calculate residual for current time jacobianTan(U, Vhat, i, dRdU, dRkdU); //Calc jacobian for current t for (j=0; j<3*(Vhat->basis.r+1); j++){ //Residual R to w and neg w[j]=-R[j]; } //Calculation of the update vectors; magic happens here dgesv(&n,&nrhs, dRdU[0], &lda, ipiv, w, &ldb, &info );// w = update //Need to update the solution now for (j=0; j<(Vhat->basis.r+1); j++){ //Update initial guesses Vhat->solution.x.array[i*(Vhat->basis.r+1)+j]+=w[j*3]; Vhat->solution.y.array[i*(Vhat->basis.r+1)+j]+=w[j*3+1]; Vhat->solution.z.array[i*(Vhat->basis.r+1)+j]+=w[j*3+2]; } residualTan(U, Vhat, i, R); //Calculate residual for current time//This will be the guess for the next element // for (j=0; j<3*(Vhat->basis.r+1); j++){ //Residual R to w and neg // printf("%g\n", R[j]); // } if (i<Vhat->tSpan.size-2){ for (j=0; j<(Vhat->basis.r+1); j++){ Vhat->solution.x.array[(i+1)*(Vhat->basis.r+1)+j]= Vhat->solution.x.array[i*(Vhat->basis.r+1)+j]; Vhat->solution.y.array[(i+1)*(Vhat->basis.r+1)+j]= Vhat->solution.y.array[i*(Vhat->basis.r+1)+j]; Vhat->solution.z.array[(i+1)*(Vhat->basis.r+1)+j]= Vhat->solution.z.array[i*(Vhat->basis.r+1)+j]; } } } //Free everything! free(R); free(dRkdU[0]); free(dRkdU); free(dRdU[0]); free(dRdU); }
double nmf_neals(double * a, double * w0, double * h0, int * pm, int * pn, \ int * pk, int * maxiter, const double * pTolX, const double * pTolFun) { // code added to be able to call from R int m = * pm; int n = * pn; int k = * pk; const double TolX = * pTolX; const double TolFun = * pTolFun; // also: changed w0, h0 to simple pointer (instead of double) // // end code added #ifdef PROFILE_NMF_NEALS struct timeval start, end; gettimeofday(&start, 0); #endif #if DEBUG_LEVEL >= 2 printf("Entering nmf_neals\n"); #endif #ifdef ERROR_CHECKING errno = 0; #endif double * help1 = (double*) malloc(sizeof(double)*k*k); double * help2 = (double*) malloc(sizeof(double)*k*n); double * help3 = (double*) malloc(sizeof(double)*k*m); //----------------------------------------- // definition of necessary dynamic data structures //...for calculating matrix h double* h = (double*) malloc(sizeof(double)*k*n); int* jpvt_h = (int*) malloc(sizeof(int)*k); int info; //...for calculating matrix w double* w = (double*) malloc(sizeof(double)*m*k); //---------------- //...for calculating the norm of A-W*H double* d = (double*) malloc(sizeof(double)*m*n); //d = a - w*h double dnorm0 = 0; double dnorm = 0; const double eps = dlamch('E'); //machine precision epsilon const double sqrteps = sqrt(eps); //squareroot of epsilon //------------------- #ifdef ERROR_CHECKING if (errno) { perror("Error allocating memory in nmf_neals"); free(help1); free(help2); free(help3); free(h); free(jpvt_h); free(w); free(d); return -1; } #endif // declaration of data structures for switch to als algorithm // ---------------------------------------------------------- int als_data_allocated = 0; // indicates wheter data structures were already allocated // factor matrices for factorizing matrix w double * q; double * r; // factor matrices for factorizing matrix h double * q_h; double * r_h; double* tau_h; //stores elementary reflectors of factor matrix Q double* work_w; //work array for factorization of matrix w int lwork_w; double* work_h; //work array for factorization of matrix h int lwork_h; double * work_qta; //work array for dorgqr int lwork_qta; double * work_qth; //work array for dorgqr int lwork_qth; //query for optimal workspace size for routine dgeqp3... double querysize; //Loop-Indices int iter, i; //variable for storing if fallback happened in current iteration int fallback; // factorisation step in a loop from 1 to maxiter for (iter = 1; iter <= *maxiter; ++iter) { //no fallback in this iteration so far fallback = 0; // calculating matrix h //---------------- //help1 = w0'*w0 dgemm('T', 'N', k, k, m, 1.0, w0, m, w0, m, 0., help1, k); //help2 = w0'*a dgemm('T', 'N', k, n, m, 1.0, w0, m, a, m, 0., help2, k); //LU-Factorisation of help1 to solve equation help1 * x = help2 dgesv(k, n, help1, k, jpvt_h, help2, k, &info); // if factor matrix U is singular -> switch back to als algorithm to compute h if( info > 0) { //set fallback to 1 to indicate that fallback happened fallback = 1; // do dynamic data structures need to be allocated? if (!als_data_allocated) { als_data_allocated = 1; // factor matrices for factorizing matrix w q = (double*) malloc(sizeof(double)*m*k); r = (double*) malloc(sizeof(double)*m*k); // factor matrices for factorizing matrix h q_h = (double*) malloc(sizeof(double)*n*k); r_h = (double*) malloc(sizeof(double)*n*k); tau_h = (double*) malloc(sizeof(double)*k); //stores elementary reflectors of factor matrix Q //query for optimal workspace size for routine dgeqp3... //for matrix w dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info); lwork_w = (int) querysize; work_w = (double*) malloc(sizeof(double)*lwork_w); //work array for factorization of matrix help1 (dgeqp3) //for matrix h dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info); lwork_h = (int) querysize; work_h = (double*) malloc(sizeof(double)*lwork_h); //work array for factorization of matrix h //query for optimal workspace size for routine dorgqr... //for matrix w dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info); lwork_qta = (int)querysize; work_qta = (double*) malloc(sizeof(double)*lwork_qta); //work array for dorgqr //for matrix h dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info); lwork_qth = (int)querysize; work_qth = (double*) malloc(sizeof(double)*lwork_qth); } // calculating matrix h //---------------- //re-initialization //copy *w0 to q dlacpy('A', m, k, w0, m, q, m); //initialise jpvt_h to 0 -> every column free for (i = 0; i<k; ++i) jpvt_h[i] = 0; // Q-R factorization with column pivoting dgeqp3(m, k, q, m, jpvt_h, tau_h, work_w, lwork_w, &info); //copying upper triangular factor-matrix r out of q into r dlacpy('U', m, k, q, m, r, k); //Begin of least-squares-solution to w0 * x = a //generate explicit matrix q (m times k) and calculate q' * a dorgqr(m, k, k, q, m, tau_h, work_qta, lwork_qta, &info); dgemm('T', 'N', k, n, m, 1.0, q, m, a, m, 0.0, q_h, k); //solve R * x = (Q'*A) dtrtrs('U','N','N',k,n,r,k,q_h,k,&info); //copy matrix q to h, but permutated according to jpvt_h for (i=0; i<k; ++i) { dcopy(n, q_h + i, k, h + jpvt_h[i] - 1, k); } //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint for (i=0; i<k*n; ++i) { if (h[i] < ZERO_THRESHOLD) h[i] = 0.; } } else { //h = max(ZERO_THRESHOLD, help1\help2) for (i=0; i < k*n; ++i) h[i] = (help2[i] > ZERO_THRESHOLD ? help2[i] : 0.); } // calculating matrix w = max(0, help1\help3)' //---------------------------- //help1 = h*h' dgemm('N', 'T', k, k, n, 1.0, h, k, h, k, 0., help1, k); //help3 = h*a' dgemm('N', 'T', k, m, n, 1.0, h, k, a, m, 0., help3, k); //LU-Factorisation of help1 dgesv(k, m, help1, k, jpvt_h, help3, k, &info); // if( info > 0) { // do dynamic data structures need to be allocated? if (!als_data_allocated) { als_data_allocated = 1; // factor matrices for factorizing matrix w q = (double*) malloc(sizeof(double)*m*k); r = (double*) malloc(sizeof(double)*m*k); // factor matrices for factorizing matrix h q_h = (double*) malloc(sizeof(double)*n*k); r_h = (double*) malloc(sizeof(double)*n*k); tau_h = (double*) malloc(sizeof(double)*k); //stores elementary reflectors of factor matrix Q //query for optimal workspace size for routine dgeqp3... //for matrix w dgeqp3(m, k, q, m, jpvt_h, tau_h, &querysize, -1, &info); lwork_w = (int) querysize; work_w = (double*) malloc(sizeof(double)*lwork_w); //work array for factorization of matrix help1 (dgeqp3) //..for matrix h dgeqp3(n, k, q_h, n, jpvt_h, tau_h, &querysize, -1, &info); lwork_h = (int) querysize; work_h = (double*) malloc(sizeof(double)*lwork_h); //work array for factorization of matrix h //query for optimal workspace size for routine dorgqr... //for matrix w dorgqr(m, k, k, q, m, tau_h, &querysize, -1, &info); lwork_qta = (int)querysize; work_qta = (double*) malloc(sizeof(double)*lwork_qta); //work array for dorgqr // ... for matrix h dorgqr(n, k, k, q_h, n, tau_h, &querysize, -1, &info); lwork_qth = (int)querysize; work_qth = (double*) malloc(sizeof(double)*lwork_qth); } //calculating matrix w //copy original matrix h to q_h, but transposed for (i=0; i<k; ++i) { dcopy(n, h + i, k, q_h + i*n, 1); } //initialise jpvt_a to 0 -> every column free for (i = 0; i<k; ++i) jpvt_h[i] = 0; //Q-R factorization dgeqp3(n, k, q_h, n, jpvt_h, tau_h, work_h, lwork_h, &info); //copying upper triangular factor-matrix r_h out of q into r_h dlacpy('U', n, k, q_h, n, r_h, k); //Begin of least-squares-solution to w0 * x = a //generate explicit matrix q (n times k) and calculate *a = q' * a' dorgqr(n, k, k, q_h, n, tau_h, work_qth, lwork_qth, &info); dgemm('T', 'T', k, m, n, 1.0, q_h, n, a, m, 0.0, q, k); //solve R_h * x = (Q'*A') dtrtrs('U', 'N', 'N', k, m, r_h, k, q, k, &info); //jpvt_h*(R\(Q'*A')) permutation and transposed copy to w for (i=0; i<k; ++i) { dcopy(m, q + i, k, w + m * (jpvt_h[i] - 1), 1); } //transform negative and very small positive values to zero for performance reasons and to keep the non-negativity constraint for (i=0; i<k*m; ++i) { if (w[i] < ZERO_THRESHOLD) w[i] = 0.; } } else { //w = max(0, help3)' for (i=0; i<k; ++i) { dcopy(m, help3 + i, k, w + i*m, 1); } for (i=0; i<m*k; ++i) { if (w[i] < ZERO_THRESHOLD) w[i] = 0.; } } // calculating the norm of D = A-W*H dnorm = calculateNorm(a, w, h, d, m, n, k); // calculating change in w -> dw //---------------------------------- double dw; dw = calculateMaxchange(w, w0, m, k, sqrteps); // calculating change in h -> dh //----------------------------------- double dh; dh = calculateMaxchange(h, h0, k, n, sqrteps); //Max-Change = max(dh, dw) = delta double delta; delta = (dh > dw) ? dh : dw; // storing the matrix results of the current iteration swap(&w0, &w); swap(&h0, &h); // storing the norm results of the current iteration dnorm0 = dnorm; #if DEBUG_LEVEL >= 1 printf("iter: %.6d\t dnorm: %.16f\t delta: %.16f\n", iter, dnorm, delta); #endif //Check for Convergence if (iter > 1) { if (delta < TolX) { *maxiter = iter; break; } else if (dnorm <= TolFun*dnorm0) { *maxiter = iter; break; } } } //end of loop from 1 to maxiter #if DEBUG_LEVEL >= 2 printf("Exiting nmf_neals\n"); #endif #ifdef PROFILE_NMF_NEALS gettimeofday(&end, 0); outputTiming("", start, end); #endif // freeing memory if used free(help1); free(help2); free(help3); free(h); free(jpvt_h); free(w); free(d); if(als_data_allocated) { free(q); free(r); free(q_h); free(r_h); free(work_h); free(work_w); free(tau_h); free(work_qta); free(work_qth); } // returning calculated norm return dnorm; }