/* * This function returns the solution of Ax=b * * The function assumes that A is symmetric & postive definite and employs * the Cholesky decomposition: * If A=L L^T with L lower triangular, the system to be solved becomes * (L L^T) x = b * This amounts to solving L y = b for y and then L^T x = y for x * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a; int a_sz, tot_sz; int info, nrhs=1; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; tot_sz=a_sz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; /* store A into a and B into x. A is assumed symmetric, * hence no transposition is needed */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* Cholesky decomposition of A */ //POTF2("L", (int *)&m, a, (int *)&m, (int *)&info); POTRF("L", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve using the computed Cholesky in one lapack call */ POTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } #if 0 /* alternative: solve the linear system L y = b ... */ TRTRS("L", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* ... solve the linear system L^T x = y */ TRTRS("L", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #endif /* 0 */ #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
/* * This function returns the solution of min_x ||Ax - b|| * * || . || is the second order (i.e. L2) norm. This is a least squares technique that * is based on QR decomposition: * If A=Q R with Q orthogonal and R upper triangular, the normal equations become * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b. * This amounts to solving R^T y = A^T b for y and then R x = y for x * Note that Q does not need to be explicitly computed * * A is mxn, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; static int nb=0; /* no __STATIC__ decl. here! */ LM_REAL *a, *tau, *r, *work; int a_sz, tau_sz, r_sz, tot_sz; register int i, j; int info, worksz, nrhs=1; register LM_REAL sum; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ if(m<n){ fprintf(stderr, RCAT("Normal equations require that the number of rows is greater than number of columns in ", AX_EQ_B_QRLS) "() [%d x %d]! -- try transposing\n", m, n); exit(1); } /* calculate required memory size */ a_sz=m*n; tau_sz=n; r_sz=n*n; if(!nb){ LM_REAL tmp; worksz=-1; // workspace query; optimal size is returned in tmp GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb } worksz=nb*m; tot_sz=a_sz + tau_sz + r_sz + worksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; tau=a+a_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<n; j++) a[i+j*m]=A[i*n+j]; /* compute A^T b in x */ for(i=0; i<n; i++){ for(j=0, sum=0.0; j<m; j++) sum+=A[j*n+i]*B[j]; x[i]=sum; } /* QR decomposition of A */ GEQRF((int *)&m, (int *)&n, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* R is stored in the upper triangular part of a. Note that a is mxn while r nxn */ for(j=0; j<n; j++){ for(i=0; i<=j; i++) r[i+j*n]=a[i+j*m]; /* lower part is zero */ for(i=j+1; i<n; i++) r[i+j*n]=0.0; } /* solve the linear system R^T y = A^t b */ TRTRS("U", "T", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve the linear system R x = y */ TRTRS("U", "N", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
/* * This function returns the solution of Ax = b * * The function is based on QR decomposition with explicit computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * Q R x = b or R x = Q^T b. * The last equation can be solved directly. * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; static int nb=0; /* no __STATIC__ decl. here! */ LM_REAL *a, *tau, *r, *work; int a_sz, tau_sz, r_sz, tot_sz; register int i, j; int info, worksz, nrhs=1; register LM_REAL sum; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; tau_sz=m; r_sz=m*m; /* only the upper triangular part really needed */ if(!nb){ LM_REAL tmp; worksz=-1; // workspace query; optimal size is returned in tmp GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb } worksz=nb*m; tot_sz=a_sz + tau_sz + r_sz + worksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; tau=a+a_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; /* QR decomposition of A */ GEQRF((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QR) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QR) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* R is stored in the upper triangular part of a; copy it in r so that ORGQR() below won't destroy it */ memcpy(r, a, r_sz*sizeof(LM_REAL)); /* compute Q using the elementary reflectors computed by the above decomposition */ ORGQR((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", AX_EQ_B_QR) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("Unknown LAPACK error (%d) in ", AX_EQ_B_QR) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* Q is now in a; compute Q^T b in x */ for(i=0; i<m; i++){ for(j=0, sum=0.0; j<m; j++) sum+=a[i*m+j]*B[j]; x[i]=sum; } /* solve the linear system R x = Q^t b */ TRTRS("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QR) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QR) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
/* * This function returns the solution of Ax=b * * The function assumes that A is symmetric & postive definite and employs * the Cholesky decomposition: * If A=U^T U with U upper triangular, the system to be solved becomes * (U^T U) x = b * This amount to solving U^T y = b for y and then U x = y for x * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { LM_REAL stackBuf[16384]; const int stackBuf_sz = 16384; __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a, *b; int a_sz, b_sz, tot_sz; register int i, j; int info, nrhs=1; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; b_sz=m; tot_sz=a_sz + b_sz; if(tot_sz <= stackBuf_sz) { a=stackBuf; } else { #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; } b=a+a_sz; /* store A (column major!) into a anb B into b */ for(i=0; i<m; i++){ for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; b[i]=B[i]; } /* Cholesky decomposition of A */ POTF2("U", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ //fprintf(stderr, RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) " in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY if(buf) free(buf); /* free previously allocated memory */ #endif return 0; } } /* solve the linear system U^T y = b */ TRTRS("U", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, b, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY if(buf) free(buf); /* free previously allocated memory */ #endif return 0; } } /* solve the linear system U x = y */ TRTRS("U", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, b, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ //fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY if(buf) free(buf); /* free previously allocated memory */ #endif return 0; } } /* copy the result in x */ for(i=0; i<m; i++) x[i]=b[i]; #ifndef LINSOLVERS_RETAIN_MEMORY if(buf) free(buf); /* free previously allocated memory */ #endif return 1; }
/* * This function returns the solution of Ax = b * * The function is based on QR decomposition with explicit computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * Q R x = b or R x = Q^T b. * The last equation can be solved directly. * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a, *qtb, *tau, *r, *work; int a_sz, qtb_sz, tau_sz, r_sz, tot_sz; register int i, j; blasint info, worksz, nrhs=1; register LM_REAL sum; #ifdef LINSOLVERS_RETAIN_MEMORY if(!A){ if(buf) free(buf); buf_sz=0; return 1; } #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; qtb_sz=m; tau_sz=m; r_sz=m*m; /* only the upper triangular part really needed */ worksz=3*m; /* this is probably too much */ tot_sz=a_sz + qtb_sz + tau_sz + r_sz + worksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; qtb=a+a_sz; tau=qtb+qtb_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; /* QR decomposition of A */ const blasint mm = m; GEQRF((blasint *)&mm, (blasint *)&mm, a, (blasint *)&mm, tau, work, (blasint *)&worksz, (blasint *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", GEQRF) " in ", AX_EQ_B_QR) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %ld for ", GEQRF) " in ", AX_EQ_B_QR) "()\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* R is stored in the upper triangular part of a; copy it in r so that ORGQR() below won't destroy it */ for(i=0; i<r_sz; i++) r[i]=a[i]; /* compute Q using the elementary reflectors computed by the above decomposition */ ORGQR((blasint *)&mm, (blasint *)&mm, (blasint *)&mm, a, (blasint *)&mm, tau, work, (blasint *)&worksz, (blasint *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", ORGQR) " in ", AX_EQ_B_QR) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT("Unknown LAPACK error (%ld) in ", AX_EQ_B_QR) "()\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* Q is now in a; compute Q^T b in qtb */ for(i=0; i<m; i++){ for(j=0, sum=0.0; j<m; j++) sum+=a[i*m+j]*B[j]; qtb[i]=sum; } /* solve the linear system R x = Q^t b */ TRTRS("U", "N", "N", (blasint *)&mm, (blasint *)&nrhs, r, (blasint *)&mm, qtb, (blasint *)&mm, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", TRTRS) " in ", AX_EQ_B_QR) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %ld-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QR) "()\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* copy the result in x */ for(i=0; i<m; i++) x[i]=qtb[i]; #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
/* * This function returns the solution of min_x ||Ax - b|| * * || . || is the second order (i.e. L2) norm. This is a least squares technique that * is based on QR decomposition: * If A=Q R with Q orthogonal and R upper triangular, the normal equations become * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b. * This amounts to solving R^T y = A^T b for y and then R x = y for x * Note that Q does not need to be explicitly computed * * A is mxn, b is mx1 * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a, *atb, *tau, *r, *work; int a_sz, atb_sz, tau_sz, r_sz, tot_sz; register int i, j; blasint info, worksz, nrhs=1; register LM_REAL sum; #ifdef LINSOLVERS_RETAIN_MEMORY if(!A){ if(buf) free(buf); buf_sz=0; return 1; } #endif /* LINSOLVERS_RETAIN_MEMORY */ if(m<n){ fprintf(stderr, RCAT("Normal equations require that the number of rows is greater than number of columns in ", AX_EQ_B_QRLS) "() [%d x %d]! -- try transposing\n", m, n); exit(1); } /* calculate required memory size */ a_sz=m*n; atb_sz=n; tau_sz=n; r_sz=n*n; worksz=3*n; /* this is probably too much */ tot_sz=a_sz + atb_sz + tau_sz + r_sz + worksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; atb=a+a_sz; tau=atb+atb_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<n; j++) a[i+j*m]=A[i*n+j]; /* compute A^T b in atb */ for(i=0; i<n; i++){ for(j=0, sum=0.0; j<m; j++) sum+=A[j*n+i]*B[j]; atb[i]=sum; } const blasint mm = m; const blasint nn = n; /* QR decomposition of A */ GEQRF((blasint *)&mm, (blasint *)&nn, a, (blasint *)&mm, tau, work, (blasint *)&worksz, (blasint *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %ld for ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* R is stored in the upper triangular part of a. Note that a is mxn while r nxn */ for(j=0; j<n; j++){ for(i=0; i<=j; i++) r[i+j*n]=a[i+j*m]; /* lower part is zero */ for(i=j+1; i<n; i++) r[i+j*n]=0.0; } /* solve the linear system R^T y = A^t b */ TRTRS("U", "T", "N", (blasint *)&nn, (blasint *)&nrhs, r, (blasint *)&nn, atb, (blasint *)&nn, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %ld-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve the linear system R x = y */ TRTRS("U", "N", "N", (blasint *)&nn, (blasint *)&nrhs, r, (blasint *)&nn, atb, (blasint *)&nn, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %ld of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", (long)-info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %ld-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", (long)info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* copy the result in x */ for(i=0; i<n; i++) x[i]=atb[i]; #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }