int SOLVE_CHOL_ONE(MATRIX_T L, VECTOR_T B) { /* solves Ax = B where A = LL^T */ int info; int N = (int)L.nrows; int Nrhs = (int)1; char uplo = 'L'; POTRS(&uplo,&N,&Nrhs,L.m,&N,B.v,&N,&info); return info; }
int SOLVE_CHOL_MULTIPLE(MATRIX_T L, MATRIX_T B) { /* solves Ax = B where A = LL^T */ int info; int N = (int)L.nrows; int Nrhs = (int)B.ncols; char uplo = 'L'; POTRS(&uplo,&N,&Nrhs,L.m,&N,B.m,&N,&info); return info; }
//--------------------------------------------------------- DVec& chol_solve(const DMat& ch, const DVec& b) //--------------------------------------------------------- { // Solves a linear system using Cholesky-factored // symmetric positive-definite matrix, A = U^T U. if (FACT_CHOL != ch.get_factmode()) {umERROR("chol_solve(ch,b)", "matrix is not factored.");} int M=ch.num_rows(), lda=ch.num_rows(); int nrhs=1, ldb=b.size(); assert(ldb == M); char uplo = 'U'; int info=0; double* ch_data = const_cast<double*>(ch.data()); // copy RHS into x, then overwrite x with solution DVec* x = new DVec(b, OBJ_temp); POTRS (uplo, M, nrhs, ch_data, lda, x->data(), ldb, info); if (info) { umERROR("chol_solve(ch,b)", "dpotrs reports: info = %d", info); } return (*x); }
//--------------------------------------------------------- bool chol_solve(const DMat& ch, const DMat& B, DMat& X) //--------------------------------------------------------- { // Solve a set of linear systems using Cholesky-factored // symmetric positive-definite matrix, A = U^T U. if (FACT_CHOL != ch.get_factmode()) {umERROR("chol_solve(ch,B,X)", "matrix is not factored.");} int M =ch.num_rows(), lda=ch.num_rows(); int ldb=B.num_rows(), nrhs=B.num_cols(); assert(ldb == M); char uplo = 'U'; int info=0; double* ch_data = const_cast<double*>(ch.data()); X = B; // overwrite X with RHS's, then solutions POTRS (uplo, M, nrhs, ch_data, lda, X.data(), ldb, info); if (info) { umERROR("chol_solve(ch,B,X)", "dpotrs reports: info = %d", info); } return true; }
int main(int argc, char *argv[]){ #ifndef COMPLEX char *trans[] = {"T", "N"}; #else char *trans[] = {"C", "N"}; #endif char *uplo[] = {"U", "L"}; FLOAT alpha[] = {1.0, 0.0}; FLOAT beta [] = {0.0, 0.0}; FLOAT *a, *b; char *p; char btest = 'F'; blasint m, i, j, info, uplos=0; double flops; int from = 1; int to = 200; int step = 1; struct timeval start, stop; double time1; argc--;argv++; if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;} if ((p = getenv("OPENBLAS_UPLO"))) if (*p == 'L') uplos=1; if ((p = getenv("OPENBLAS_TEST"))) btest=*p; fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]); if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } for(m = from; m <= to; m += step){ #ifndef COMPLEX if (uplos & 1) { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) a[i + j * m] = 0.; a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; } } else { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; for(i = j + 1; i < m; i++) a[i + j * m] = 0.; } } #else if (uplos & 1) { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) { a[(i + j * m) * 2 + 0] = 0.; a[(i + j * m) * 2 + 1] = 0.; } a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; a[(j + j * m) * 2 + 1] = 0.; for(i = j + 1; i < m; i++) { a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; } } } else { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) { a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; } a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; a[(j + j * m) * 2 + 1] = 0.; for(i = j + 1; i < m; i++) { a[(i + j * m) * 2 + 0] = 0.; a[(i + j * m) * 2 + 1] = 0.; } } } #endif SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); gettimeofday( &start, (struct timezone *)0); POTRF(uplo[uplos], &m, b, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Potrf info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; if ( btest == 'S' ) { for(j = 0; j < to; j++){ for(i = 0; i < to * COMPSIZE; i++){ a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; } } gettimeofday( &start, (struct timezone *)0); POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Potrs info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; } if ( btest == 'I' ) { gettimeofday( &start, (struct timezone *)0); POTRI(uplo[uplos], &m, b, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Potri info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; } fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest); } return 0; }
/* * This function returns the solution of Ax=b * * The function assumes that A is symmetric & postive definite and employs * the Cholesky decomposition: * If A=L L^T with L lower triangular, the system to be solved becomes * (L L^T) x = b * This amounts to solving L y = b for y and then L^T x = y for x * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a; int a_sz, tot_sz; int info, nrhs=1; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; tot_sz=a_sz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; /* store A into a and B into x. A is assumed symmetric, * hence no transposition is needed */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* Cholesky decomposition of A */ //POTF2("L", (int *)&m, a, (int *)&m, (int *)&info); POTRF("L", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve using the computed Cholesky in one lapack call */ POTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } #if 0 /* alternative: solve the linear system L y = b ... */ TRTRS("L", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* ... solve the linear system L^T x = y */ TRTRS("L", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #endif /* 0 */ #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
void l2ls_learn_basis_dual(DOUBLE *Dopt, DOUBLE *Dorig, DOUBLE *X, DOUBLE *S, DOUBLE l2norm, INT length, INT N, INT K, INT numSamples) { DOUBLE *SSt = (DOUBLE *) MALLOC(K * K * sizeof(DOUBLE)); CHAR uplo = 'U'; CHAR trans = 'N'; INT SYRKN = K; INT SYRKK = numSamples; DOUBLE alpha = 1; INT SYRKLDA = K; DOUBLE beta = 0; INT SYRKLDC = K; SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, S, &SYRKLDA, &beta, SSt, &SYRKLDC); DOUBLE *XSt = (DOUBLE *) MALLOC(N * K * sizeof(DOUBLE)); CHAR transa = 'N'; CHAR transb = 'T'; INT GEMMM = N; INT GEMMN = K; INT GEMMK = numSamples; alpha = 1; INT GEMMLDA = N; INT GEMMLDB = K; beta = 0; INT GEMMLDC = N; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, X, &GEMMLDA, S, &GEMMLDB, &beta, XSt, &GEMMLDC); DOUBLE *SXt = (DOUBLE *) MALLOC(N * K * sizeof(DOUBLE)); transpose(XSt, SXt, N, K); INT iterK; DOUBLE *dualLambdaOrig = (DOUBLE *) MALLOC(K * sizeof(DOUBLE)); if (Dorig == NULL) { srand(time(NULL)); for (iterK = 0; iterK < K; ++iterK) { dualLambdaOrig[iterK] = 10 * (DOUBLE) rand() / (DOUBLE) RAND_MAX; } } else { INT maxNK = IMAX(N, K); DOUBLE *B = (DOUBLE *) MALLOC(maxNK * maxNK * sizeof(DOUBLE)); for (iterK = 0; iterK < K; ++iterK) { datacpy(&B[iterK * maxNK], &XSt[iterK * N], K); } INT GELSYM = N; INT GELSYN = K; INT GELSYNRHS = K; INT GELSYLDA = N; INT GELSYLDB = maxNK; INT *jpvt = (INT *) MALLOC(K * sizeof(INT)); DOUBLE rcond; INT rank; INT lwork = -1; DOUBLE work_temp; DOUBLE *work; INT INFO; GELSY(&GELSYM, &GELSYN, &GELSYNRHS, Dorig, &GELSYLDA, B, &GELSYLDB, jpvt, &rcond, &rank, &work_temp, &lwork, &INFO); lwork = (INT) work_temp; work = (DOUBLE*) MALLOC(lwork * sizeof(DOUBLE)); GELSY(&GELSYM, &GELSYN, &GELSYNRHS, Dorig, &GELSYLDA, XSt, &GELSYLDB, jpvt, &rcond, &rank, work, &lwork, &INFO); for (iterK = 0; iterK < K; ++iterK) { dualLambdaOrig[K] = B[iterK * K + iterK] - SSt[iterK * K + iterK]; } FREE(work); FREE(B); FREE(jpvt); } DOUBLE *SXtXSt = (DOUBLE *) MALLOC(K * K * sizeof(DOUBLE)); uplo = 'U'; trans = 'N'; SYRKN = K; SYRKK = N; alpha = 1; SYRKLDA = K; beta = 0; SYRKLDC = K; SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, SXt, &SYRKLDA, &beta, SXtXSt, &SYRKLDC); DOUBLE c = SQR(l2norm); CHAR norm = 'F'; INT LANGEM = N; INT LANGEN = numSamples; INT LANGELDA = N; DOUBLE trXXt = LANGE(&norm, &LANGEM, &LANGEN, X, &LANGELDA, NULL); trXXt = SQR(trXXt); /* DOUBLE *dualLambdaOpt = (DOUBLE *) MALLOC(K * sizeof(DOUBLE)); */ DOUBLE *dualLambdaOpt = XSt; minimize_dual(dualLambdaOpt, dualLambdaOrig, length, SSt, SXt, SXtXSt, trXXt, c, N, K); for (iterK = 0; iterK < K; ++iterK) { SSt[iterK * K + iterK] += dualLambdaOpt[iterK]; } uplo = 'U'; INT POTRSN = K; INT POTRSLDA = K; INT INFO; POTRF(&uplo, &POTRSN, SSt, &POTRSLDA, &INFO); INT POTRSNRHS = N; INT POTRSLDB = K; POTRS(&uplo, &POTRSN, &POTRSNRHS, SSt, &POTRSLDA, SXt, &POTRSLDB, &INFO); transpose(SXt, Dopt, K, N); FREE(SSt); FREE(XSt); FREE(SXt); FREE(dualLambdaOrig); FREE(SXtXSt); }
void dual_obj_grad(DOUBLE *obj, DOUBLE *deriv, DOUBLE *dualLambda, DOUBLE *SSt, DOUBLE *SXt, DOUBLE *SXtXSt, DOUBLE trXXt, \ DOUBLE c, INT N, INT K, INT derivFlag, DOUBLE *SStLambda, DOUBLE *tempMatrix) { INT maxNK = IMAX(N, K); INT SStLambdaFlag = 0; if (SStLambda == NULL) { SStLambda = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); SStLambdaFlag = 1; } INT tempMatrixFlag = 0; if (tempMatrix == NULL) { tempMatrix = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); tempMatrixFlag = 1; } datacpy(SStLambda, SSt, K * K); INT iterK; /* #pragma omp parallel for private(iterK) shared(SStLambda, dualLambda, K) */ for (iterK = 0; iterK < K; ++iterK) { SStLambda[iterK * K + iterK] += dualLambda[iterK]; } CHAR uplo = 'U'; INT POTRSN = K; INT POTRSLDA = K; INT INFO; POTRF(&uplo, &POTRSN, SStLambda, &POTRSLDA, &INFO); datacpy(tempMatrix, SXtXSt, K * K); INT POTRSNRHS = K; INT POTRSLDB = K; POTRS(&uplo, &POTRSN, &POTRSNRHS, SStLambda, &POTRSLDA, tempMatrix, &POTRSLDB, &INFO); DOUBLE objTemp = 0; /* #pragma omp parallel for private(iterK) shared(tempMatrix, K) reduction(-: objTemp) */ for (iterK = 0; iterK < K; ++iterK) { objTemp = objTemp - tempMatrix[iterK * K + iterK]; } INT ASUMN = K; INT incx = 1; DOUBLE sumDualLambda = ASUM(&ASUMN, dualLambda, &incx); objTemp += trXXt - c * sumDualLambda; *obj = - objTemp; if (derivFlag == 1) { datacpy(tempMatrix, SXt, K * N); POTRSNRHS = N; POTRSLDB = K; POTRS(&uplo, &POTRSN, &POTRSNRHS, SStLambda, &POTRSLDA, tempMatrix, &POTRSLDB, &INFO); transpose(tempMatrix, SStLambda, K, N); INT NRM2N = N; DOUBLE tempNorm; #pragma omp parallel for private(iterK, tempNorm) shared(SStLambda, deriv, K, c) for (iterK = 0; iterK < K; ++iterK) { tempNorm = NRM2(&NRM2N, &SStLambda[iterK * N], &incx); deriv[iterK] = - SQR(tempNorm) + c; } } if (SStLambdaFlag == 1) { FREE(SStLambda); } if (tempMatrixFlag == 1) { FREE(tempMatrix); } }
//============================================================================= int Epetra_SerialSpdDenseSolver::Solve(void) { int ierr = 0; // We will call one of four routines depending on what services the user wants and // whether or not the matrix has been inverted or factored already. // // If the matrix has been inverted, use DGEMM to compute solution. // Otherwise, if the user want the matrix to be equilibrated or wants a refined solution, we will // call the X interface. // Otherwise, if the matrix is already factored we will call the TRS interface. // Otherwise, if the matrix is unfactored we will call the SV interface. if (Equilibrate_) { ierr = Epetra_SerialDenseSolver::EquilibrateRHS(); B_Equilibrated_ = true; } EPETRA_CHK_ERR(ierr); if (A_Equilibrated_ && !B_Equilibrated_) EPETRA_CHK_ERR(-1); // Matrix and vectors must be similarly scaled if (!A_Equilibrated_ && B_Equilibrated_) EPETRA_CHK_ERR(-2); if (B_==0) EPETRA_CHK_ERR(-3); // No B if (X_==0) EPETRA_CHK_ERR(-4); // No B if (ShouldEquilibrate() && !A_Equilibrated_) ierr = 1; // Warn that the system should be equilibrated. double DN = N_; double DNRHS = NRHS_; if (Inverted()) { if (B_==X_) EPETRA_CHK_ERR(-100); // B and X must be different for this case GEMM('N', 'N', N_, NRHS_, N_, 1.0, AF_, LDAF_, B_, LDB_, 0.0, X_, LDX_); if (INFO_!=0) EPETRA_CHK_ERR(INFO_); UpdateFlops(2.0*DN*DN*DNRHS); Solved_ = true; } else { if (!Factored()) Factor(); // Matrix must be factored if (B_!=X_) { *LHS_ = *RHS_; // Copy B to X if needed X_ = LHS_->A(); LDX_ = LHS_->LDA(); } POTRS(SymMatrix_->UPLO(), N_, NRHS_, AF_, LDAF_, X_, LDX_, &INFO_); if (INFO_!=0) EPETRA_CHK_ERR(INFO_); UpdateFlops(2.0*DN*DN*DNRHS); Solved_ = true; } int ierr1=0; if (RefineSolution_) ierr1 = ApplyRefinement(); if (ierr1!=0) { EPETRA_CHK_ERR(ierr1); } else { EPETRA_CHK_ERR(ierr); } if (Equilibrate_) ierr1 = Epetra_SerialDenseSolver::UnequilibrateLHS(); EPETRA_CHK_ERR(ierr1); return(0); }