void OpenMPBLAS3::trmm( const enum CBLAS_ORDER order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const IndexType m, const IndexType n, const double alpha, const double* A, const IndexType lda, double* B, const IndexType ldb, SyncToken* syncToken ) { if ( syncToken ) { LAMA_LOG_WARN( logger, "no asynchronous execution for openmp possible at this level." ) } char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M = m, F77_N = n, F77_lda = lda, F77_ldb = ldb; #else #define F77_M m #define F77_N n #define F77_lda lda #define F77_ldb ldb #endif IndexType RowMajorStrg; RowMajorStrg = 0; if ( order == CblasColMajor ) { if ( Side == CblasRight ) { SD = 'R'; } else if ( Side == CblasLeft ) { SD = 'L'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 2, "cblas_dtrmm", "Illegal Side setting, %d\n", Side ); RowMajorStrg = 0; return; } if ( Uplo == CblasUpper ) { UL = 'U'; } else if ( Uplo == CblasLower ) { UL = 'L'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 3, "cblas_dtrmm", "Illegal Uplo setting, %d\n", Uplo ); RowMajorStrg = 0; return; } if ( TransA == CblasTrans ) { TA = 'T'; } else if ( TransA == CblasConjTrans ) { TA = 'C'; } else if ( TransA == CblasNoTrans ) { TA = 'N'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 4, "cblas_dtrmm", "Illegal Trans setting, %d\n", TransA ); RowMajorStrg = 0; return; } if ( Diag == CblasUnit ) { DI = 'U'; } else if ( Diag == CblasNonUnit ) { DI = 'N'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 5, "cblas_dtrmm", "Illegal Diag setting, %d\n", Diag ); RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR( &UL ); F77_TA = C2F_CHAR( &TA ); F77_SD = C2F_CHAR( &SD ); F77_DI = C2F_CHAR( &DI ); #endif F77_dtrmm( F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb ); } else if ( order == CblasRowMajor ) { RowMajorStrg = 1; if ( Side == CblasRight ) { SD = 'L'; } else if ( Side == CblasLeft ) { SD = 'R'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 2, "cblas_dtrmm", "Illegal Side setting, %d\n", Side ); RowMajorStrg = 0; return; } if ( Uplo == CblasUpper ) { UL = 'L'; } else if ( Uplo == CblasLower ) { UL = 'U'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 3, "cblas_dtrmm", "Illegal Uplo setting, %d\n", Uplo ); RowMajorStrg = 0; return; } if ( TransA == CblasTrans ) { TA = 'T'; } else if ( TransA == CblasConjTrans ) { TA = 'C'; } else if ( TransA == CblasNoTrans ) { TA = 'N'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 4, "cblas_dtrmm", "Illegal Trans setting, %d\n", TransA ); RowMajorStrg = 0; return; } if ( Diag == CblasUnit ) { DI = 'U'; } else if ( Diag == CblasNonUnit ) { DI = 'N'; } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 5, "cblas_dtrmm", "Illegal Diag setting, %d\n", Diag ); RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR( &UL ); F77_TA = C2F_CHAR( &TA ); F77_SD = C2F_CHAR( &SD ); F77_DI = C2F_CHAR( &DI ); #endif F77_dtrmm( F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb ); } else { BLASHelper::XERBLA_cpu( RowMajorStrg, 1, "cblas_dtrmm", "Illegal order setting, %d\n", order ); } RowMajorStrg = 0; return; }
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const CBLAS_INT_TYPE M, const CBLAS_INT_TYPE N, const double alpha, const double *A, const CBLAS_INT_TYPE lda, double *B, const CBLAS_INT_TYPE ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_dtrmm","Illegal Side setting, %d\n", Side); return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_dtrmm","Illegal Uplo setting, %d\n", Uplo); return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_dtrmm","Illegal Trans setting, %d\n", TransA); return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_dtrmm","Illegal Diag setting, %d\n", Diag); return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_dtrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb); } else if (Order == CblasRowMajor) { if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_dtrmm","Illegal Side setting, %d\n", Side); return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_dtrmm","Illegal Uplo setting, %d\n", Uplo); return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_dtrmm","Illegal Trans setting, %d\n", TransA); return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_dtrmm","Illegal Diag setting, %d\n", Diag); return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_dtrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb); } else cblas_xerbla(1, "cblas_dtrmm", "Illegal Order setting, %d\n", Order); return; }
/** * Do the least squares using QR decomposition */ double accel_lsq_qr(double** A, double* data, double* oparam, int ndata, int nparam, double** Ocvm){ int nhrs=1; int nwork = -1; int info=0; int i,j; double iwork; // workspace query - works out optimal size for work array F77_dgels("T", &nparam, &ndata, &nhrs, A[0], &nparam, data, &ndata, &iwork, &nwork, &info); nwork=(int)iwork; logdbg("nwork = %d (%lf)",nwork,iwork); double* work = static_cast<double*>(malloc(sizeof(double)*nwork)); logdbg("accel_lsq_qr ndata=%d nparam=%d",ndata,nparam); // as usual we prentend that the matrix is transposed to deal with C->fortran conversion // degls does a least-squares using QR decomposition. Fast and robust. F77_dgels("T", &nparam, &ndata, &nhrs, A[0], &nparam, data, &ndata, work, &nwork, &info); free(work); // if info is not zero then the fit failed. if(info!=0){ logerr("Error in lapack DEGLS. INFO=%d See full logs for explanation.",info); logmsg(""); logmsg("From: http://www.netlib.org/lapack/explore-html/d8/dde/dgels_8f.html"); logmsg("INFO is INTEGER"); logmsg(" = 0: successful exit"); logmsg(" < 0: if INFO = -i, the i-th argument had an illegal value"); logmsg(" > 0: if INFO = i, the i-th diagonal element of the"); logmsg(" triangular factor of A is zero, so that A does not have"); logmsg(" full rank; the least squares solution could not be"); logmsg(" computed."); logmsg(""); if(info > 0){ logerr("It appears that you are fitting for a 'bad' parameter - E.g A jump on a non-existant flag."); logmsg(" TEMPO2 will NOT attempt to deal with this!"); logmsg("Cannot continue. Abort fit."); return -1; } else { logmsg("Cannot continue. Abort fit."); return -1; } } assert(info==0); if(oparam!=NULL){ // copy out the output parameters, which are written into the "data" array. memcpy(oparam,data,sizeof(double)*nparam); } double chisq=0; for( i = nparam; i < ndata; i++ ) chisq += data[i] * data[i]; if (Ocvm != NULL){ int n=nparam; // packed triangular matrix. double* _t=(double*)malloc(sizeof(double)*(n*(n+1))/2); // This code taken from the LAPACK documentation // to pack a triangular matrix. // we want the upper triangular matrix part of A. // // pack upper triangle like (r,c) // (1,1) (1,2) (2,2) // i+(2n-j)(j-1)/2 int jc=0; for (j=0;j<n;j++){ // cols for (i=0; i <=j; i++) { // rows _t[jc] = A[i][j]; // A came from fortran, so is in [col][row] ordering // BUT - we have transposed A, so we have to un-transpose it ++jc; } } logdbg("Inverting..."); F77_dtptri("U","N",&n,_t,&i); if(i!=0){ logerr("Error in lapack DTPTRI. INFO=%d",i); logmsg("From: http://www.netlib.org/lapack/explore-html/d8/d05/dtptri_8f.html"); logmsg("INFO is INTEGER"); logmsg(" = 0: successful exit"); logmsg(" < 0: if INFO = -i, the i-th argument had an illegal value"); logmsg(" > 0: if INFO = i, A(i,i) is exactly zero. The triangular"); logmsg(" matrix is singular and its inverse can not be computed."); logmsg("Cannot continue - abort fit"); return -1; } double **Rinv = malloc_uinv(n); for (j=0;j<n;j++){ // cols for (i=0;i<n;i++){ //rows Ocvm[i][j]=0; } } // Unpack the triangular matrix using reverse of above // We will put it in fortran, so continue to use [col][row] order jc=0; for (j=0;j<n;j++){ // cols for (i=0; i <=j; i++) { // rows Rinv[j][i] = _t[jc]; Ocvm[j][i] = _t[jc]; ++jc; } } free(_t); double a=chisq/(double)(ndata-nparam); // (X^T X)^-1 = Rinv.Rinv^T gives parameter covariance matrix // Note that Ocvm is input and output // and that covar matrix will be transposed, but it is // symetric so it doesn't matter! F77_dtrmm( "R", "U", "T", "N", &n, &n, &a, *Rinv, &n, *Ocvm, &n); // DTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A , LDA, B, LDB ) if(debugFlag){ for(i=0;i<n;i++){ for(j=0;j<n;j++){ logdbg("COVAR %d %d %lg",i,j,Ocvm[i][j]); } } } free_uinv(Rinv); } return chisq; }