Beispiel #1
0
void OpenMPBLAS3::trmm(
    const enum CBLAS_ORDER order,
    const enum CBLAS_SIDE Side,
    const enum CBLAS_UPLO Uplo,
    const enum CBLAS_TRANSPOSE TransA,
    const enum CBLAS_DIAG Diag,
    const IndexType m,
    const IndexType n,
    const double alpha,
    const double* A,
    const IndexType lda,
    double* B,
    const IndexType ldb,
    SyncToken* syncToken )
{
    if ( syncToken )
    {
        LAMA_LOG_WARN( logger, "no asynchronous execution for openmp possible at this level." )
    }

    char UL, TA, SD, DI;
#ifdef F77_CHAR
    F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI;
#else
#define F77_TA &TA
#define F77_UL &UL
#define F77_SD &SD
#define F77_DI &DI
#endif
#ifdef F77_INT
    F77_INT F77_M = m, F77_N = n, F77_lda = lda, F77_ldb = ldb;
#else
#define F77_M m
#define F77_N n
#define F77_lda lda
#define F77_ldb ldb
#endif
    IndexType RowMajorStrg;
    RowMajorStrg = 0;

    if ( order == CblasColMajor )
    {
        if ( Side == CblasRight )
        {
            SD = 'R';
        }
        else if ( Side == CblasLeft )
        {
            SD = 'L';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 2, "cblas_dtrmm", "Illegal Side setting, %d\n", Side );
            RowMajorStrg = 0;
            return;
        }

        if ( Uplo == CblasUpper )
        {
            UL = 'U';
        }
        else if ( Uplo == CblasLower )
        {
            UL = 'L';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 3, "cblas_dtrmm", "Illegal Uplo setting, %d\n", Uplo );
            RowMajorStrg = 0;
            return;
        }

        if ( TransA == CblasTrans )
        {
            TA = 'T';
        }
        else if ( TransA == CblasConjTrans )
        {
            TA = 'C';
        }
        else if ( TransA == CblasNoTrans )
        {
            TA = 'N';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 4, "cblas_dtrmm", "Illegal Trans setting, %d\n", TransA );
            RowMajorStrg = 0;
            return;
        }

        if ( Diag == CblasUnit )
        {
            DI = 'U';
        }
        else if ( Diag == CblasNonUnit )
        {
            DI = 'N';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 5, "cblas_dtrmm", "Illegal Diag setting, %d\n", Diag );
            RowMajorStrg = 0;
            return;
        }

#ifdef F77_CHAR
        F77_UL = C2F_CHAR( &UL );
        F77_TA = C2F_CHAR( &TA );
        F77_SD = C2F_CHAR( &SD );
        F77_DI = C2F_CHAR( &DI );
#endif
        F77_dtrmm( F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb );
    }
    else if ( order == CblasRowMajor )
    {
        RowMajorStrg = 1;

        if ( Side == CblasRight )
        {
            SD = 'L';
        }
        else if ( Side == CblasLeft )
        {
            SD = 'R';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 2, "cblas_dtrmm", "Illegal Side setting, %d\n", Side );
            RowMajorStrg = 0;
            return;
        }

        if ( Uplo == CblasUpper )
        {
            UL = 'L';
        }
        else if ( Uplo == CblasLower )
        {
            UL = 'U';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 3, "cblas_dtrmm", "Illegal Uplo setting, %d\n", Uplo );
            RowMajorStrg = 0;
            return;
        }

        if ( TransA == CblasTrans )
        {
            TA = 'T';
        }
        else if ( TransA == CblasConjTrans )
        {
            TA = 'C';
        }
        else if ( TransA == CblasNoTrans )
        {
            TA = 'N';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 4, "cblas_dtrmm", "Illegal Trans setting, %d\n", TransA );
            RowMajorStrg = 0;
            return;
        }

        if ( Diag == CblasUnit )
        {
            DI = 'U';
        }
        else if ( Diag == CblasNonUnit )
        {
            DI = 'N';
        }
        else
        {
            BLASHelper::XERBLA_cpu( RowMajorStrg, 5, "cblas_dtrmm", "Illegal Diag setting, %d\n", Diag );
            RowMajorStrg = 0;
            return;
        }

#ifdef F77_CHAR
        F77_UL = C2F_CHAR( &UL );
        F77_TA = C2F_CHAR( &TA );
        F77_SD = C2F_CHAR( &SD );
        F77_DI = C2F_CHAR( &DI );
#endif
        F77_dtrmm( F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb );
    }
    else
    {
        BLASHelper::XERBLA_cpu( RowMajorStrg, 1, "cblas_dtrmm", "Illegal order setting, %d\n", order );
    }

    RowMajorStrg = 0;
    return;
}
Beispiel #2
0
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const  enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const CBLAS_INT_TYPE M, const CBLAS_INT_TYPE N,
                 const double alpha, const double  *A, const CBLAS_INT_TYPE lda,
                 double  *B, const CBLAS_INT_TYPE ldb)
{
   char UL, TA, SD, DI;   
#ifdef F77_CHAR
   F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI;
#else
   #define F77_TA &TA  
   #define F77_UL &UL  
   #define F77_SD &SD
   #define F77_DI &DI
#endif

#ifdef F77_INT
   F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
#else
   #define F77_M M
   #define F77_N N
   #define F77_lda lda
   #define F77_ldb ldb
#endif


   if( Order == CblasColMajor )
   {
      if( Side == CblasRight) SD='R';
      else if ( Side == CblasLeft ) SD='L';
      else 
      {
         cblas_xerbla(2, "cblas_dtrmm","Illegal Side setting, %d\n", Side);
         return;
      }
      if( Uplo == CblasUpper) UL='U';
      else if ( Uplo == CblasLower ) UL='L';
      else 
      {
         cblas_xerbla(3, "cblas_dtrmm","Illegal Uplo setting, %d\n", Uplo);
         return;
      }

      if( TransA == CblasTrans) TA ='T';
      else if ( TransA == CblasConjTrans ) TA='C';
      else if ( TransA == CblasNoTrans )   TA='N';
      else 
      {
         cblas_xerbla(4, "cblas_dtrmm","Illegal Trans setting, %d\n", TransA);
         return;
      }

      if( Diag == CblasUnit ) DI='U';
      else if ( Diag == CblasNonUnit ) DI='N';
      else 
      {
         cblas_xerbla(5, "cblas_dtrmm","Illegal Diag setting, %d\n", Diag);
         return;
      }

      #ifdef F77_CHAR
         F77_UL = C2F_CHAR(&UL);
         F77_TA = C2F_CHAR(&TA);
         F77_SD = C2F_CHAR(&SD);
         F77_DI = C2F_CHAR(&DI);
      #endif

      F77_dtrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb);
   } else if (Order == CblasRowMajor)
   {
      if( Side == CblasRight) SD='L';
      else if ( Side == CblasLeft ) SD='R';
      else 
      {
         cblas_xerbla(2, "cblas_dtrmm","Illegal Side setting, %d\n", Side);
         return;
      }

      if( Uplo == CblasUpper) UL='L';
      else if ( Uplo == CblasLower ) UL='U';
      else 
      {
         cblas_xerbla(3, "cblas_dtrmm","Illegal Uplo setting, %d\n", Uplo);
         return;
      }

      if( TransA == CblasTrans) TA ='T';
      else if ( TransA == CblasConjTrans ) TA='C';
      else if ( TransA == CblasNoTrans )   TA='N';
      else 
      {
         cblas_xerbla(4, "cblas_dtrmm","Illegal Trans setting, %d\n", TransA);
         return;
      }

      if( Diag == CblasUnit ) DI='U';
      else if ( Diag == CblasNonUnit ) DI='N';
      else 
      {
         cblas_xerbla(5, "cblas_dtrmm","Illegal Diag setting, %d\n", Diag);
         return;
      }

      #ifdef F77_CHAR
         F77_UL = C2F_CHAR(&UL);
         F77_TA = C2F_CHAR(&TA);
         F77_SD = C2F_CHAR(&SD);
         F77_DI = C2F_CHAR(&DI);
      #endif
      F77_dtrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb);
   } 
   else cblas_xerbla(1, "cblas_dtrmm", "Illegal Order setting, %d\n", Order);
   return;
}
Beispiel #3
0
/**
 * Do the least squares using QR decomposition
 */
double accel_lsq_qr(double** A, double* data, double* oparam, int ndata, int nparam, double** Ocvm){
    int nhrs=1;
    int nwork = -1;
    int info=0;
    int i,j;
    double iwork;

    // workspace query - works out optimal size for work array
    F77_dgels("T", &nparam, &ndata, &nhrs, A[0], &nparam, data, &ndata, &iwork, &nwork, &info);
    nwork=(int)iwork;
    logdbg("nwork = %d (%lf)",nwork,iwork);
    double* work = static_cast<double*>(malloc(sizeof(double)*nwork));
    logdbg("accel_lsq_qr ndata=%d nparam=%d",ndata,nparam);

    // as usual we prentend that the matrix is transposed to deal with C->fortran conversion
    // degls does a least-squares using QR decomposition. Fast and robust.
    F77_dgels("T", &nparam, &ndata, &nhrs, A[0], &nparam, data, &ndata, work, &nwork, &info);

    free(work);
    // if info is not zero then the fit failed.
    if(info!=0){
        logerr("Error in lapack DEGLS. INFO=%d See full logs for explanation.",info);
        logmsg("");
        logmsg("From: http://www.netlib.org/lapack/explore-html/d8/dde/dgels_8f.html");
        logmsg("INFO is INTEGER");
        logmsg("  = 0:  successful exit");
        logmsg("  < 0:  if INFO = -i, the i-th argument had an illegal value");
        logmsg("  > 0:  if INFO =  i, the i-th diagonal element of the");
        logmsg("        triangular factor of A is zero, so that A does not have");
        logmsg("        full rank; the least squares solution could not be");
        logmsg("        computed.");
        logmsg("");
        if(info > 0){
            logerr("It appears that you are fitting for a 'bad' parameter - E.g A jump on a non-existant flag.");
            logmsg(" TEMPO2 will NOT attempt to deal with this!");
            logmsg("Cannot continue. Abort fit.");
            return -1;
        } else {
            logmsg("Cannot continue. Abort fit.");
            return -1;
        }
    }
    assert(info==0);

    if(oparam!=NULL){
        // copy out the output parameters, which are written into the "data" array.
        memcpy(oparam,data,sizeof(double)*nparam);
    }
    double chisq=0;
    for( i = nparam; i < ndata; i++ ) chisq += data[i] * data[i];
    if (Ocvm != NULL){

        int n=nparam;


        // packed triangular matrix.
        double* _t=(double*)malloc(sizeof(double)*(n*(n+1))/2);

        // This code taken from the LAPACK documentation
        // to pack a triangular matrix.

        // we want the upper triangular matrix part of A.
        //
        // pack upper triangle like (r,c)
        // (1,1) (1,2) (2,2)
        //  i+(2n-j)(j-1)/2
        int jc=0;
        for (j=0;j<n;j++){ // cols
            for (i=0; i <=j; i++) { // rows
                _t[jc] = A[i][j]; // A came from fortran, so is in [col][row] ordering
                // BUT - we have transposed A, so we have to un-transpose it
                ++jc;
            }
        }

        logdbg("Inverting...");
        F77_dtptri("U","N",&n,_t,&i);
        if(i!=0){
            logerr("Error in lapack DTPTRI. INFO=%d",i);
            logmsg("From: http://www.netlib.org/lapack/explore-html/d8/d05/dtptri_8f.html");
            logmsg("INFO is INTEGER");
            logmsg("  = 0:  successful exit");
            logmsg("  < 0:  if INFO = -i, the i-th argument had an illegal value");
            logmsg("  > 0:  if INFO = i, A(i,i) is exactly zero.  The triangular");
            logmsg("  matrix is singular and its inverse can not be computed.");
            logmsg("Cannot continue - abort fit");
            return -1;
        }

        double **Rinv = malloc_uinv(n);

        for (j=0;j<n;j++){ // cols
            for (i=0;i<n;i++){ //rows
                Ocvm[i][j]=0;
            }
        }
        // Unpack the triangular matrix using reverse of above
        // We will put it in fortran, so continue to use [col][row] order
        jc=0;
        for (j=0;j<n;j++){ // cols
            for (i=0; i <=j; i++) { // rows
                Rinv[j][i] = _t[jc];
                Ocvm[j][i] = _t[jc];
                ++jc;
            }
        }

        free(_t);

        double a=chisq/(double)(ndata-nparam);
        // (X^T X)^-1 = Rinv.Rinv^T gives parameter covariance matrix
        // Note that Ocvm is input and output
        // and that covar matrix will be transposed, but it is
        // symetric so it doesn't matter!
        F77_dtrmm(  "R",  "U",    "T",  "N", &n, &n,    &a, *Rinv,  &n, *Ocvm, &n);
        // DTRMM ( SIDE, UPLO, TRANSA, DIAG,  M,  N, ALPHA,  A   , LDA,     B, LDB )

        if(debugFlag){
            for(i=0;i<n;i++){
                for(j=0;j<n;j++){
                    logdbg("COVAR %d %d %lg",i,j,Ocvm[i][j]);
                }
            }
        }

        free_uinv(Rinv);
    }


    return chisq;
}