C++ (Cpp) cblas_zgemvの例

プログラミング言語: C++ (Cpp)

メソッド/関数: cblas_zgemv

hotexamples.comのコード掲載数: 13

C++ (Cpp) cblas_zgemv - 13件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のcblas_zgemvの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: cblas2_overloads.hpp プロジェクト: bpedret/Sassena-GPU

 inline 
 void gemv (CBLAS_ORDER const Order, 
            CBLAS_TRANSPOSE const TransA, int const M, int const N,
            traits::complex_d const& alpha, 
            traits::complex_d const* A, int const lda,
            traits::complex_d const* X, int const incX, 
            traits::complex_d const& beta, 
            traits::complex_d* Y, int const incY) 
 {
   cblas_zgemv (Order, TransA, M, N, 
                static_cast<void const*> (&alpha), 
                static_cast<void const*> (A), lda, 
                static_cast<void const*> (X), incX,
                static_cast<void const*> (&beta), 
                static_cast<void*> (Y), incY); 
 }

コード例 #2

ファイルを表示

ファイル: qwrapper_zgemv.c プロジェクト: gpichon/eigenproblems

void CORE_zgemv_quark(Quark *quark)
{
    PLASMA_enum trans;
    int m, n, lda, incx, incy;
    PLASMA_Complex64_t alpha, beta;
    const PLASMA_Complex64_t *A, *x;
    PLASMA_Complex64_t *y;

    quark_unpack_args_11( quark, trans, m, n, alpha, A, lda, x, incx, beta, y, incy );
    cblas_zgemv(
        CblasColMajor,
        (CBLAS_TRANSPOSE)trans,
        m, n,
        CBLAS_SADDR(alpha), A, lda,
                            x, incx,
        CBLAS_SADDR(beta),  y, incy);
}

コード例 #3

ファイルを表示

ファイル: gemv.hpp プロジェクト: aydindemircioglu/RcppShark

inline void gemv(CBLAS_ORDER const Order,
        CBLAS_TRANSPOSE const TransA, int const M, int const N,
         double alpha,
        std::complex<double> const *A, int const lda,
        std::complex<double> const *X, int const incX,
        double beta,
        std::complex<double> *Y, int const incY
) {
	std::complex<double> alphaArg(alpha,0);
	std::complex<double> betaArg(beta,0);
	cblas_zgemv(Order, TransA, M, N,
	        reinterpret_cast<cblas_double_complex_type const *>(&alphaArg),
	        reinterpret_cast<cblas_double_complex_type const *>(A), lda,
	        reinterpret_cast<cblas_double_complex_type const *>(X), incX,
	        reinterpret_cast<cblas_double_complex_type const *>(&betaArg),
	        reinterpret_cast<cblas_double_complex_type *>(Y), incY);
}

コード例 #4

ファイルを表示

ファイル: ComplexMatrix.cpp プロジェクト: dhold/Matlab_code

void phi_gemv(const int N, const Complex *alpha, 
                 const Complex *A, const Complex *X,
                 const Complex *beta, Complex *Y){
#ifndef NOBLAS
    #ifdef SINGLEPRECISION
    cblas_cgemv(CblasColMajor,CblasNoTrans,N,N,alpha,A,N,X,1,beta,Y,1);
    #else
    cblas_zgemv(CblasColMajor,CblasNoTrans,N,N,alpha,A,N,X,1,beta,Y,1);
    #endif
#else
    int i,n;
       
    //multiply beta:
    for(i = 0; i < N; ++i)
        Y[i] *= (*beta);

    for(i = 0; i < N; ++i) {
        for (n = 0; n < N; ++n)
            Y[i] += (*alpha)*A[n*N+i]*X[n];
    } 
#endif
}

コード例 #5

ファイルを表示

ファイル: core_zttlqt.c プロジェクト: gpichon/eigenproblems

int CORE_zttlqt(int M, int N, int IB,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t zzero = 0.0;
#ifdef COMPLEX
    static int                ione  = 1;
#endif

    PLASMA_Complex64_t alpha;
    int i, j, l, ii, sb, mi, ni;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    /* TODO: Need to check why some cases require
     *  this to not have uninitialized values */
    CORE_zlaset( PlasmaUpperLower, IB, N,
                 0., 0., T, LDT);

    for(ii = 0; ii < M; ii += IB) {
        sb = min(M-ii, IB);
        for(i = 0; i < sb; i++) {
            j  = ii + i;
            mi = sb-i-1;
            ni = min( j + 1, N);
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ).
             */
#ifdef COMPLEX
            LAPACKE_zlacgv_work(ni, &A2[j], LDA2);
            LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1);
#endif
            LAPACKE_zlarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]);

            if (mi > 0) {
                /*
                 * Apply H( j-1 ) to A( j:II+IB-1, j-1:M  ) from the right.
                 */
                cblas_zcopy(
                    mi,
                    &A1[LDA1*j+(j+1)], 1,
                    WORK, 1);

                cblas_zgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                    mi, ni,
                    CBLAS_SADDR(zone), &A2[j+1], LDA2,
                    &A2[j], LDA2,
                    CBLAS_SADDR(zone), WORK, 1);

                alpha = -(TAU[j]);
                cblas_zaxpy(
                    mi, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*j+j+1], 1);

                cblas_zgerc(
                    CblasColMajor, mi, ni,
                    CBLAS_SADDR(alpha), WORK, 1,
                    &A2[j], LDA2,
                    &A2[j+1], LDA2);
            }

            /*
             * Calculate T.
             */

            if (i > 0 ) {

                l = min(i, max(0, N-ii));
                alpha = -(TAU[j]);

                CORE_zpemv(
                        PlasmaNoTrans, PlasmaRowwise,
                        i , min(j, N), l,
                        alpha, &A2[ii], LDA2,
                        &A2[j], LDA2,
                        zzero, &T[LDT*j], 1,
                        WORK);

                /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
                cblas_ztrmv(
                        CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                        (CBLAS_TRANSPOSE)PlasmaNoTrans,
                        (CBLAS_DIAG)PlasmaNonUnit,
                        i, &T[LDT*ii], LDT,
                        &T[LDT*j], 1);

            }

#ifdef COMPLEX
            LAPACKE_zlacgv_work(ni, &A2[j], LDA2 );
            LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1 );
#endif

            T[LDT*j+i] = TAU[j];
        }

        /* Apply Q to the rest of the matrix to the right */
        if (M > ii+sb) {
            mi = M-(ii+sb);
            ni = min(ii+sb, N);
            l  = min(sb, max(0, ni-ii));
            CORE_zparfb(
                PlasmaRight, PlasmaNoTrans,
                PlasmaForward, PlasmaRowwise,
                mi, IB, mi, ni, sb, l,
                &A1[LDA1*ii+ii+sb], LDA1,
                &A2[ii+sb], LDA2,
                &A2[ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, M);

        }
    }
    return PLASMA_SUCCESS;
}

コード例 #6

ファイルを表示

ファイル: core_zttqrt.c プロジェクト: joao-lima/plasma-kaapi

int CORE_zttqrt(int M, int N, int IB,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t zzero = 0.0;
    static int                ione  = 1;

    PLASMA_Complex64_t alpha;
    int i, j, ii, sb, mi, ni;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDA2");
        return -7;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:mi, II*IB+I ).
             */
            mi = ii + i + 1;
            LAPACKE_zlarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]);

            if (sb-i-1>0) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
                 */
                ni = sb-i-1;
                cblas_zcopy(
                    ni,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);

#ifdef COMPLEX
                LAPACKE_zlacgv_work(ni, WORK, ione);
#endif
                cblas_zgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans,
                    mi, ni,
                    CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    CBLAS_SADDR(zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(ni, WORK, ione);
#endif
                alpha = -conj(TAU[ii+i]);
                cblas_zaxpy(
                    ni, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(ni, WORK, ione);
#endif
                cblas_zgerc(
                    CblasColMajor, mi, ni,
                    CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }

            /*
             * Calculate T.
             */

            if (i > 0 ) {

                cblas_zcopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1);
    
                cblas_ztrmv(
                    CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                    (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit,
                    i, &A2[LDA2*ii+ii], LDA2,
                    &WORK[ii], 1);
    
                alpha = -(TAU[ii+i]);
                for(j = 0; j < i; j++) {
                    WORK[ii+j] = alpha * WORK[ii+j];
                }
    
                if (ii > 0) {
                    cblas_zgemv(
                        CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i,
                        CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
                        &A2[LDA2*(ii+i)], 1,
                        CBLAS_SADDR(zzero), WORK, 1);
    
                    cblas_zaxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1);
                }
    
                cblas_zcopy(i, WORK, 1, &T[LDT*(ii+i)], 1);
    
                cblas_ztrmv(
                    CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                    (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit,
                    i, &T[LDT*ii], LDT,
                    &T[LDT*(ii+i)], 1);

            }

            T[LDT*(ii+i)+i] = TAU[ii+i];
        }

        /* Apply Q' to the rest of the matrix to the left  */
        if (N > ii+sb) {
            CORE_zttrfb(
                PlasmaLeft, PlasmaConjTrans,
                PlasmaForward, PlasmaColumnwise,
                sb, N-(ii+sb), ii+sb, N-(ii+sb), sb,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}

コード例 #7

ファイルを表示

ファイル: wrapper.cpp プロジェクト: 2003pro/armadillo

 void wrapper_cblas_zgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha,
                          const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
   {
              cblas_zgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
   }

コード例 #8

ファイルを表示

ファイル: core_ztsqrt.c プロジェクト: joao-lima/plasma-kaapi

int CORE_ztsqrt(int M, int N, int IB,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t zzero = 0.0;

    PLASMA_Complex64_t alpha;
    int i, ii, sb;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDA2 < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA2");
        return -8;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);
        for(i = 0; i < sb; i++) {
            /*
             * Generate elementary reflector H( II*IB+I ) to annihilate
             * A( II*IB+I:M, II*IB+I )
             */
            LAPACKE_zlarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);

            if (ii+i+1 < N) {
                /*
                 * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
                 */
                alpha = -conj(TAU[ii+i]);
                cblas_zcopy(
                    sb-i-1,
                    &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
                    WORK, 1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(sb-i-1, WORK, 1);
#endif
                cblas_zgemv(
                    CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans,
                    M, sb-i-1,
                    CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
                    &A2[LDA2*(ii+i)], 1,
                    CBLAS_SADDR(zone), WORK, 1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_zaxpy(
                    sb-i-1, CBLAS_SADDR(alpha),
                    WORK, 1,
                    &A1[LDA1*(ii+i+1)+ii+i], LDA1);
#ifdef COMPLEX
                LAPACKE_zlacgv_work(sb-i-1, WORK, 1 );
#endif
                cblas_zgerc(
                    CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha),
                    &A2[LDA2*(ii+i)], 1,
                    WORK, 1,
                    &A2[LDA2*(ii+i+1)], LDA2);
            }
            /*
             * Calculate T
             */
            alpha = -TAU[ii+i];
            cblas_zgemv(
                CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i,
                CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
                &A2[LDA2*(ii+i)], 1,
                CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1);

            cblas_ztrmv(
                CblasColMajor, (CBLAS_UPLO)PlasmaUpper,
                (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i,
                &T[LDT*ii], LDT,
                &T[LDT*(ii+i)], 1);

            T[LDT*(ii+i)+i] = TAU[ii+i];
        }
        if (N > ii+sb) {
            CORE_ztsmqr(
                PlasmaLeft, PlasmaConjTrans,
                sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
                &A1[LDA1*(ii+sb)+ii], LDA1,
                &A2[LDA2*(ii+sb)], LDA2,
                &A2[LDA2*ii], LDA2,
                &T[LDT*ii], LDT,
                WORK, sb);
        }
    }
    return PLASMA_SUCCESS;
}

コード例 #9

ファイルを表示

ファイル: gsl_cblas_test_gemv.c プロジェクト: aadaa88/mldemos

void
test_gemv (void) {
const double flteps = 1e-4, dbleps = 1e-6;
  {
   int order = 101;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha = 1.0f;
   float beta = -0.3f;
   float A[] = { -0.805f };
   float X[] = { -0.965f };
   int incX = -1;
   float Y[] = { 0.537f };
   int incY = -1;
   float y_expected[] = { 0.615725f };
   cblas_sgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 774)");
     }
   };
  };


  {
   int order = 102;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha = 1.0f;
   float beta = -0.3f;
   float A[] = { -0.805f };
   float X[] = { -0.965f };
   int incX = -1;
   float Y[] = { 0.537f };
   int incY = -1;
   float y_expected[] = { 0.615725f };
   cblas_sgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 775)");
     }
   };
  };


  {
   int order = 101;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha = 1.0f;
   float beta = 0.0f;
   float A[] = { -0.805f };
   float X[] = { -0.965f };
   int incX = -1;
   float Y[] = { 0.537f };
   int incY = -1;
   float y_expected[] = { 0.776825f };
   cblas_sgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 776)");
     }
   };
  };


  {
   int order = 102;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha = 1.0f;
   float beta = 0.0f;
   float A[] = { -0.805f };
   float X[] = { -0.965f };
   int incX = -1;
   float Y[] = { 0.537f };
   int incY = -1;
   float y_expected[] = { 0.776825f };
   cblas_sgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 777)");
     }
   };
  };


  {
   int order = 101;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha = -0.3;
   double beta = -1;
   double A[] = { -0.047 };
   double X[] = { 0.672 };
   int incX = -1;
   double Y[] = { 0.554 };
   int incY = -1;
   double y_expected[] = { -0.5445248 };
   cblas_dgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 778)");
     }
   };
  };


  {
   int order = 102;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha = -0.3;
   double beta = -1;
   double A[] = { -0.047 };
   double X[] = { 0.672 };
   int incX = -1;
   double Y[] = { 0.554 };
   int incY = -1;
   double y_expected[] = { -0.5445248 };
   cblas_dgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 779)");
     }
   };
  };


  {
   int order = 101;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha = -1;
   double beta = 1;
   double A[] = { -0.047 };
   double X[] = { 0.672 };
   int incX = -1;
   double Y[] = { 0.554 };
   int incY = -1;
   double y_expected[] = { 0.585584 };
   cblas_dgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 780)");
     }
   };
  };


  {
   int order = 102;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha = -1;
   double beta = 1;
   double A[] = { -0.047 };
   double X[] = { 0.672 };
   int incX = -1;
   double Y[] = { 0.554 };
   int incY = -1;
   double y_expected[] = { 0.585584 };
   cblas_dgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 781)");
     }
   };
  };


  {
   int order = 101;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha[2] = {0.0f, 0.1f};
   float beta[2] = {0.0f, 1.0f};
   float A[] = { 0.629f, 0.801f };
   float X[] = { 0.778f, -0.073f };
   int incX = -1;
   float Y[] = { -0.976f, -0.682f };
   int incY = -1;
   float y_expected[] = { 0.624274f, -0.921216f };
   cblas_cgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 782) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 782) imag");
     };
   };
  };


  {
   int order = 102;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha[2] = {0.0f, 0.1f};
   float beta[2] = {0.0f, 1.0f};
   float A[] = { 0.629f, 0.801f };
   float X[] = { 0.778f, -0.073f };
   int incX = -1;
   float Y[] = { -0.976f, -0.682f };
   int incY = -1;
   float y_expected[] = { 0.624274f, -0.921216f };
   cblas_cgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 783) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 783) imag");
     };
   };
  };


  {
   int order = 101;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha[2] = {0.0f, 1.0f};
   float beta[2] = {-0.3f, 0.1f};
   float A[] = { 0.629f, 0.801f };
   float X[] = { 0.778f, -0.073f };
   int incX = -1;
   float Y[] = { -0.976f, -0.682f };
   int incY = -1;
   float y_expected[] = { -0.216261f, 0.654835f };
   cblas_cgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 784) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 784) imag");
     };
   };
  };


  {
   int order = 102;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha[2] = {0.0f, 1.0f};
   float beta[2] = {-0.3f, 0.1f};
   float A[] = { 0.629f, 0.801f };
   float X[] = { 0.778f, -0.073f };
   int incX = -1;
   float Y[] = { -0.976f, -0.682f };
   int incY = -1;
   float y_expected[] = { -0.216261f, 0.654835f };
   cblas_cgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 785) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 785) imag");
     };
   };
  };


  {
   int order = 101;
   int trans = 113;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha[2] = {0.0f, 0.1f};
   float beta[2] = {-0.3f, 0.1f};
   float A[] = { 0.629f, 0.801f };
   float X[] = { 0.778f, -0.073f };
   int incX = -1;
   float Y[] = { -0.976f, -0.682f };
   int incY = -1;
   float y_expected[] = { 0.427909f, 0.150089f };
   cblas_cgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 786) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 786) imag");
     };
   };
  };


  {
   int order = 102;
   int trans = 113;
   int M = 1;
   int N = 1;
   int lda = 1;
   float alpha[2] = {0.0f, 0.1f};
   float beta[2] = {-0.3f, 0.1f};
   float A[] = { 0.629f, 0.801f };
   float X[] = { 0.778f, -0.073f };
   int incX = -1;
   float Y[] = { -0.976f, -0.682f };
   int incY = -1;
   float y_expected[] = { 0.427909f, 0.150089f };
   cblas_cgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 787) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 787) imag");
     };
   };
  };


  {
   int order = 101;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha[2] = {0, 0.1};
   double beta[2] = {1, 0};
   double A[] = { 0.932, -0.724 };
   double X[] = { 0.334, -0.317 };
   int incX = -1;
   double Y[] = { 0.348, 0.07 };
   int incY = -1;
   double y_expected[] = { 0.401726, 0.078178 };
   cblas_zgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 788) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 788) imag");
     };
   };
  };


  {
   int order = 102;
   int trans = 111;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha[2] = {0, 0.1};
   double beta[2] = {1, 0};
   double A[] = { 0.932, -0.724 };
   double X[] = { 0.334, -0.317 };
   int incX = -1;
   double Y[] = { 0.348, 0.07 };
   int incY = -1;
   double y_expected[] = { 0.401726, 0.078178 };
   cblas_zgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 789) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 789) imag");
     };
   };
  };


  {
   int order = 101;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha[2] = {-0.3, 0.1};
   double beta[2] = {0, 1};
   double A[] = { 0.932, -0.724 };
   double X[] = { 0.334, -0.317 };
   int incX = -1;
   double Y[] = { 0.348, 0.07 };
   int incY = -1;
   double y_expected[] = { -0.040808, 0.517356 };
   cblas_zgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 790) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 790) imag");
     };
   };
  };


  {
   int order = 102;
   int trans = 112;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha[2] = {-0.3, 0.1};
   double beta[2] = {0, 1};
   double A[] = { 0.932, -0.724 };
   double X[] = { 0.334, -0.317 };
   int incX = -1;
   double Y[] = { 0.348, 0.07 };
   int incY = -1;
   double y_expected[] = { -0.040808, 0.517356 };
   cblas_zgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 791) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 791) imag");
     };
   };
  };


  {
   int order = 101;
   int trans = 113;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha[2] = {1, 0};
   double beta[2] = {0, 0};
   double A[] = { 0.932, -0.724 };
   double X[] = { 0.334, -0.317 };
   int incX = -1;
   double Y[] = { 0.348, 0.07 };
   int incY = -1;
   double y_expected[] = { 0.540796, -0.053628 };
   cblas_zgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 792) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 792) imag");
     };
   };
  };


  {
   int order = 102;
   int trans = 113;
   int M = 1;
   int N = 1;
   int lda = 1;
   double alpha[2] = {1, 0};
   double beta[2] = {0, 0};
   double A[] = { 0.932, -0.724 };
   double X[] = { 0.334, -0.317 };
   int incX = -1;
   double Y[] = { 0.348, 0.07 };
   int incY = -1;
   double y_expected[] = { 0.540796, -0.053628 };
   cblas_zgemv(order, trans, M, N, alpha, A, lda, X,                                  incX, beta, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 793) real");
       gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 793) imag");
     };
   };
  };


}

コード例 #10

ファイルを表示

ファイル: cddg_inference.c プロジェクト: jbloomlab/pips-1.0

// This dMList function creates a list of the dM values giving the probabilities of mutations
static PyObject *dMList(PyObject *self, PyObject *args) {
    // Calling variables are (in order): uts, only_need, n_aa, length, grs, dmlist, residue_to_compute, iwt, brs
    PyObject *uts, *only_need, *grs, *r_grs_tuple, *gr_diag, *p, *p_inv, *dmlist, *brs, *brz;
    long n_aa, length, n_aa2, n_uts, residue_to_compute, i_ut, x, y, index, only_need_index, only_need_i, only_need_i_n_aa, index2, iwt, n_aa3, z;
    double *arr_dmlist, *cp_inv, *cp, *cgr_diag, *cexpd, *cbrz, *cvrz, *cvrz_p_inv;
    complex double *complex_cp_inv, *complex_cp, *complex_cgr_diag, *complex_cexpd, *complex_naa2_list, *complex_naa_list, *complex_cvrz, *complex_cvrz_p_inv, *complex_cbrz;
    double ut, exp_utdx, exp_utdy, dx, dy;
    complex double complex_exp_utdx, complex_exp_utdy, complex_dx, complex_dy;
    int array_type;
#ifdef USE_ACCELERATE_CBLAS
    complex double complex_one = 1, complex_zero = 0;
#else
    complex double complex_dmxy, complex_v_p_inv_xy;
    double dmxy, v_p_inv_xy;
    long yindex, irowcolumn;
#endif
    // Parse the arguments.  
    if (! PyArg_ParseTuple( args, "O!O!llO!O!llO!", &PyList_Type, &uts, &PyList_Type, &only_need, &n_aa, &length, &PyList_Type, &grs, &PyArray_Type, &dmlist, &residue_to_compute, &iwt, &PyList_Type, &brs)) {
        PyErr_SetString(PyExc_TypeError, "Invalid calling arguments to dMList.");
        return NULL;
    }
    // Error checking on arguments
    if (length < 1) { // length of the protein
        PyErr_SetString(PyExc_ValueError, "length is less than one.");
        return NULL;
    }
    if (n_aa < 1) { // number of amino acids.  Normally will be 20.
        PyErr_SetString(PyExc_ValueError, "n_aa is less than one.");
        return NULL;
    }
    if (PyList_GET_SIZE(grs) != length) { // make sure grs is of the same size as length
        PyErr_SetString(PyExc_ValueError, "grs is not of the same size as length.");
        return NULL;
    }
    n_uts = PyList_GET_SIZE(uts); // number of entries in uts
    if (n_uts < 1) { // make sure there are entries in uts 
        PyErr_SetString(PyExc_ValueError, "uts has no entries.");
        return NULL;
    }
    if (PyList_GET_SIZE(only_need) != n_uts * length) { // make sure only_need is of correct size
        PyErr_SetString(PyExc_ValueError, "only_need is of wrong size.");
        return NULL;
    }
    if (! ((residue_to_compute >= 0) && (residue_to_compute < length))) {
        PyErr_SetString(PyExc_ValueError, "Invalid value for residue_to_compute.");
        return NULL;
    }
    if (! ((iwt >= 0) && (iwt < n_aa))) {
        PyErr_SetString(PyExc_ValueError, "Invalid value for iwt.");
        return NULL;
    }
    if (PyList_GET_SIZE(brs) != n_aa) { // make sure brs has one entry for each amino acid
        PyErr_SetString(PyExc_ValueError, "brs is not of length equal to n_aa");
        return NULL;
    }
    n_aa2 = n_aa * n_aa; // square of the number of amino acids
    n_aa3 = n_aa2 * n_aa; // cube of the number of amino acids
    // The results will be returned in a numpy ndarray 'float_' (C type double) array called dmlist.
    // This array will be of size length * n_uts * n_aa3.
    arr_dmlist = (double *) PyArray_DATA(dmlist); // this is the data array of dmlist
    long const sizeof_cexpd = n_aa2 * sizeof(double);
    long const complex_sizeof_cexpd = n_aa2 * sizeof(complex double);
    // gr_diag, p, and p_inv are the eigenvalues, left, and right diagonalizing matrices of gr
    r_grs_tuple = PyList_GET_ITEM(grs, residue_to_compute);
    gr_diag = PyTuple_GET_ITEM(r_grs_tuple, 0); 
    p = PyTuple_GET_ITEM(r_grs_tuple, 1);
    p_inv = PyTuple_GET_ITEM(r_grs_tuple, 2);
    // Now begin filling arr_dmlist with the appropriate values
    index = 0;
    only_need_index = residue_to_compute * n_uts;
    // determine if these arrays are complex double or real doubles
    array_type = PyArray_TYPE(gr_diag);
    if (array_type == NPY_DOUBLE) { // array is of doubles, real not complex
        // Note that these next assignments assume that the arrays are C-style contiguous
        cp = PyArray_DATA(p);
        cp_inv = PyArray_DATA(p_inv);
        cgr_diag = PyArray_DATA(gr_diag);
        cexpd = (double *) malloc(sizeof_cexpd); // allocate memory
        cvrz = (double *) malloc(sizeof_cexpd); // allocate memory
        cvrz_p_inv = (double *) malloc(sizeof_cexpd); // allocate memory
        for (i_ut = 0; i_ut < n_uts; i_ut++) { // loop over ut values
            only_need_i = PyInt_AS_LONG(PyList_GET_ITEM(only_need, only_need_index)); // value of only_need
            only_need_index++;
            if (only_need_i == -1) { // we don't need to do anything for these entries in dmlist
                index += n_aa3;
            } else { // we need to compute at least some entries in dmlist
                ut = PyFloat_AS_DOUBLE(PyList_GET_ITEM(uts, i_ut));  // ut value
                // Entries of cexpd are defined by D_xy = (exp(ut d_x) - exp(ut d_y)) / (d_x - d_y)
                // for x != y, and D_yy = ut exp(d_x ut)
                index2 = 0;
                for (x = 0; x < n_aa; x++) {
                    dx = cgr_diag[x];
                    exp_utdx = exp(ut * dx);
                    for (y = 0; y < n_aa; y++) {
                        if (y == x) {
                            cexpd[index2] = ut * exp_utdx;
                        } else {
                            dy = cgr_diag[y];
                            exp_utdy = exp(ut * dy);
                            cexpd[index2] = (exp_utdx - exp_utdy) / (dx - dy);
                        }
                        index2++;
                    }
                }
                for (z = 0; z < n_aa; z++) {
                    // compute derivative with respect to z
                    if (z == iwt) { // don't compute values with respect to wildtype
                        index += n_aa2;
                        continue;
                    }
                    brz = PyList_GET_ITEM(brs, z);
                    cbrz = PyArray_DATA(brz);
                    // cvrz is the element-by-element product of cbrz and cexpd
                    for (index2 = 0; index2 < n_aa2; index2++) {
                        cvrz[index2] = cbrz[index2] * cexpd[index2];
                    }
#ifdef USE_ACCELERATE_CBLAS
                    // multiply the matrices using cblas_dgemm
                    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n_aa, n_aa, n_aa, (double) 1.0, cvrz, n_aa, cp_inv, n_aa, (double) 0.0, cvrz_p_inv, n_aa); // multiply cvrz and cp_inv into cvrz_p_inv
#else
                    // multiply the matrices in pure C code
                    // multiply cvrz and cp_inv into cvrz_p_inv
                    index2 = 0;
                    for (x = 0; x < n_aa2; x += n_aa) {
                        for (y = 0; y < n_aa; y++) {
                            v_p_inv_xy = 0.0;
                            yindex = y;
                            for (irowcolumn = x; irowcolumn < x + n_aa; irowcolumn++) {
                                v_p_inv_xy += cvrz[irowcolumn] * cp_inv[yindex];
                                yindex += n_aa;
                            }
                            cvrz_p_inv[index2++] = v_p_inv_xy;
                        }
                    }
#endif
                    if (only_need_i == -2) { // we need to compute all of these entries in dmlist
#ifdef USE_ACCELERATE_CBLAS
                        cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n_aa, n_aa, n_aa, (double) 1.0, cp, n_aa, cvrz_p_inv, n_aa, (double) 0.0, &arr_dmlist[index], n_aa); // multiply cp and cvrz_p_inv into arr_dmlist[index : index + n_aa2]
                        index += n_aa2;
#else
                        // multiply the matrices in pure C code, and fill dmlist with the results
                        // multiply cp and cvrz_p_inv into arr_dmlist[index : index + n_aa2]
                        for (x = 0; x < n_aa2; x += n_aa) {
                            for (y = 0; y < n_aa; y++) {
                                dmxy = 0.0;
                                yindex = y;
                                for (irowcolumn = x; irowcolumn < x + n_aa; irowcolumn++) {
                                    dmxy += cp[irowcolumn] * cvrz_p_inv[yindex];
                                    yindex += n_aa;
                                }
                                arr_dmlist[index++] = dmxy;
                            }
                        }
#endif
                    } else { // we need to compute entries in dmlist only for x = only_need_i
                        only_need_i_n_aa = only_need_i * n_aa;
                        index += only_need_i_n_aa;
#ifdef USE_ACCELERATE_CBLAS
                        // do the matrix vector multiplication using cblas, and put results in dmlist
                        cblas_dgemv(CblasRowMajor, CblasTrans, n_aa, n_aa, (double) 1.0, cvrz_p_inv, n_aa, &cp[only_need_i_n_aa], 1, (double) 0.0, &arr_dmlist[index], 1); 
                        index += n_aa2 - only_need_i_n_aa;
#else
                        // do the matrix vector multiplication in pure C code, and put results in mlist
                        for (y = 0; y < n_aa; y++) {
                            dmxy = 0.0;
                            yindex = y;
                            for (irowcolumn = only_need_i_n_aa; irowcolumn < only_need_i_n_aa + n_aa; irowcolumn++) {
                                dmxy += cp[irowcolumn] * cvrz_p_inv[yindex];
                                yindex += n_aa;
                            }
                            arr_dmlist[index++] = dmxy;
                        }
                        index += n_aa2 - only_need_i_n_aa - n_aa;
#endif
                    }
                }
            }
        }
        free(cexpd);
        free(cvrz);
        free(cvrz_p_inv);
    } else if (array_type == NPY_CDOUBLE) { // array is of complex doubles
        // Note that these next assignments assume that the arrays are C-style contiguous
        complex_cp = PyArray_DATA(p);
        complex_cp_inv = PyArray_DATA(p_inv);
        complex_cgr_diag = PyArray_DATA(gr_diag);
        complex_cexpd = (complex double *) malloc(complex_sizeof_cexpd); // allocate memory
        complex_cvrz = (complex double *) malloc(complex_sizeof_cexpd); // allocate memory
        complex_cvrz_p_inv = (complex double *) malloc(complex_sizeof_cexpd); // allocate memory
        complex_naa2_list = (complex double *) malloc(n_aa2 * sizeof(complex double)); // allocate memory
        complex_naa_list = (complex double *) malloc(n_aa * sizeof(complex double)); // allocate memory
        for (i_ut = 0; i_ut < n_uts; i_ut++) { // loop over ut values
            only_need_i = PyInt_AS_LONG(PyList_GET_ITEM(only_need, only_need_index)); // value of only_need
            only_need_index++;
            if (only_need_i == -1) { // we don't need to do anything for these entries in dmlist
                index += n_aa3;
            } else { // we need to compute at least some entries in dmlist
                ut = PyFloat_AS_DOUBLE(PyList_GET_ITEM(uts, i_ut));  // ut value
                // Entries of cexpd are defined by D_xy = (exp(ut d_x) - exp(ut d_y)) / (d_x - d_y)
                // for x != y, and D_yy = ut exp(d_x ut)
                index2 = 0;
                for (x = 0; x < n_aa; x++) {
                    complex_dx = complex_cgr_diag[x];
                    complex_exp_utdx = cexp(ut * complex_dx);
                    for (y = 0; y < n_aa; y++) {
                        if (y == x) {
                            complex_cexpd[index2] = ut * complex_exp_utdx;
                        } else {
                            complex_dy = complex_cgr_diag[y];
                            complex_exp_utdy = cexp(ut * complex_dy);
                            complex_cexpd[index2] = (complex_exp_utdx - complex_exp_utdy) / (complex_dx - complex_dy);
                        }
                        index2++;
                    }
                }
                for (z = 0; z < n_aa; z++) {
                    // compute derivative with respect to z
                    if (z == iwt) { // don't compute values with respect to wildtype
                        index += n_aa2;
                        continue;
                    }
                    brz = PyList_GET_ITEM(brs, z);
                    complex_cbrz = PyArray_DATA(brz);
                    // cvrz is the element-by-element product of cbrz and cexpd
                    for (index2 = 0; index2 < n_aa2; index2++) {
                        complex_cvrz[index2] = complex_cbrz[index2] * complex_cexpd[index2];
                    }
#ifdef USE_ACCELERATE_CBLAS
                    // multiply the matrices using cblas_zgemm
                    cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n_aa, n_aa, n_aa, &complex_one, complex_cvrz, n_aa, complex_cp_inv, n_aa, &complex_zero, complex_cvrz_p_inv, n_aa); // multiply cvrz and cp_inv into cvrz_p_inv
#else
                    // multiply the matrices in pure C code
                    // multiply cvrz and cp_inv into cvrz_p_inv
                    index2 = 0;
                    for (x = 0; x < n_aa2; x += n_aa) {
                        for (y = 0; y < n_aa; y++) {
                            complex_v_p_inv_xy = 0.0;
                            yindex = y;
                            for (irowcolumn = x; irowcolumn < x + n_aa; irowcolumn++) {
                                complex_v_p_inv_xy += complex_cvrz[irowcolumn] * complex_cp_inv[yindex];
                                yindex += n_aa;
                            }
                            complex_cvrz_p_inv[index2++] = complex_v_p_inv_xy;
                        }
                    }
#endif
                    if (only_need_i == -2) { // we need to compute all of these entries in dmlist
#ifdef USE_ACCELERATE_CBLAS
                        cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n_aa, n_aa, n_aa, &complex_one, complex_cp, n_aa, complex_cvrz_p_inv, n_aa, &complex_zero, complex_naa2_list, n_aa); // multiply cp and cvrz_p_inv into arr_dmlist[index : index + n_aa2]
                        for (index2 = 0; index2 < n_aa2; index2++) {
                            arr_dmlist[index + index2] = creal(complex_naa2_list[index2]);
                        }
                        index += n_aa2;
#else
                        // multiply the matrices in pure C code, and fill dmlist with the results
                        // multiply cp and cvrz_p_inv into arr_dmlist[index : index + n_aa2]
                        for (x = 0; x < n_aa2; x += n_aa) {
                            for (y = 0; y < n_aa; y++) {
                                complex_dmxy = 0.0;
                                yindex = y;
                                for (irowcolumn = x; irowcolumn < x + n_aa; irowcolumn++) {
                                    complex_dmxy += complex_cp[irowcolumn] * complex_cvrz_p_inv[yindex];
                                    yindex += n_aa;
                                }
                                arr_dmlist[index++] = creal(complex_dmxy);
                            }
                        }
#endif
                    } else { // we need to compute entries in dmlist only for x = only_need_i
                        only_need_i_n_aa = only_need_i * n_aa;
                        index += only_need_i_n_aa;
#ifdef USE_ACCELERATE_CBLAS
                        // do the matrix vector multiplication using cblas, and put results in dmlist
                        cblas_zgemv(CblasRowMajor, CblasTrans, n_aa, n_aa, &complex_one, complex_cvrz_p_inv, n_aa, &complex_cp[only_need_i_n_aa], 1, &complex_zero, complex_naa_list, 1); 
                        for (index2 = 0; index2 < n_aa; index2++) {
                            arr_dmlist[index + index2] = creal(complex_naa_list[index2]);
                        }
                        index += n_aa2 - only_need_i_n_aa;
#else
                        // do the matrix vector multiplication in pure C code, and put results in mlist
                        for (y = 0; y < n_aa; y++) {
                            complex_dmxy = 0.0;
                            yindex = y;
                            for (irowcolumn = only_need_i_n_aa; irowcolumn < only_need_i_n_aa + n_aa; irowcolumn++) {
                                complex_dmxy += complex_cp[irowcolumn] * complex_cvrz_p_inv[yindex];
                                yindex += n_aa;
                            }
                            arr_dmlist[index++] = creal(complex_dmxy);
                        }
                        index += n_aa2 - only_need_i_n_aa - n_aa;
#endif
                    }
                }
            }
        }
        free(complex_cexpd);
        free(complex_cvrz);
        free(complex_cvrz_p_inv);
        free(complex_naa2_list);
        free(complex_naa_list);
    } else { // array is of neither real nor complex doubles
        PyErr_SetString(PyExc_ValueError, "matrices are neither double nor complex doubles.");
        return NULL;
    }
    return PyInt_FromLong((long) 1);
}

コード例 #11

ファイルを表示

ファイル: cddg_inference.c プロジェクト: jbloomlab/pips-1.0

// This MList function creates a list of the M values giving the probabilities of mutations
static PyObject *MList(PyObject *self, PyObject *args) {
    // Calling variables are (in order): uts, only_need, n_aa, length, grs, mlist, residue_to_compute
    PyObject *uts, *only_need, *grs, *r_grs_tuple, *gr_diag, *p, *p_inv, *mlist;
    long n_aa, length, n_aa2, n_uts, residue_to_compute, i_ut, y, index, irowcolumn, only_need_index, only_need_i, only_need_i_n_aa;
    double *arr_mlist, *cp_inv, *cp, *cgr_diag, *cp_inv_exp;
    complex double *complex_cp_inv, *complex_cp, *complex_cgr_diag, *complex_cp_inv_exp, *complex_naa2_list, *complex_naa_list;
    double ut, exp_ut;
    complex double complex_exp_ut;
    int array_type;
#ifdef USE_ACCELERATE_CBLAS
    complex double complex_one = 1, complex_zero = 0;
    long index2;
#else
    complex double complex_mxy;
    double mxy;
    long x, yindex;
#endif
    // Parse the arguments.  
    if (! PyArg_ParseTuple( args, "O!O!llO!O!l", &PyList_Type, &uts, &PyList_Type, &only_need, &n_aa, &length, &PyList_Type, &grs, &PyArray_Type, &mlist, &residue_to_compute)) {
        PyErr_SetString(PyExc_TypeError, "Invalid calling arguments to MList.");
        return NULL;
    }
    // Error checking on arguments
    if (length < 1) { // length of the protein
        PyErr_SetString(PyExc_ValueError, "length is less than one.");
        return NULL;
    }
    if (n_aa < 1) { // number of amino acids.  Normally will be 20.
        PyErr_SetString(PyExc_ValueError, "n_aa is less than one.");
        return NULL;
    }
    if (PyList_GET_SIZE(grs) != length) { // make sure grs is of the same size as length
        // PyErr_SetString(PyExc_ValueError, "grs is not of the same size as length.");
        char errstring[200];
        sprintf(errstring, "grs is not of the same size as length: %ld, %ld", PyList_GET_SIZE(grs), length);
        PyErr_SetString(PyExc_ValueError, errstring);
        return NULL;
    }
    n_uts = PyList_GET_SIZE(uts); // number of entries in uts
    if (n_uts < 1) { // make sure there are entries in uts 
        PyErr_SetString(PyExc_ValueError, "uts has no entries.");
        return NULL;
    }
    if (PyList_GET_SIZE(only_need) != n_uts * length) { // make sure only_need is of correct size
        PyErr_SetString(PyExc_ValueError, "only_need is of wrong size.");
        return NULL;
    }
    if (! ((residue_to_compute >= 0) && (residue_to_compute < length))) {
        char errstring[200];
        sprintf(errstring, "Invalid value for residue_to_compute: %ld, %ld", residue_to_compute, length);
        PyErr_SetString(PyExc_ValueError, errstring);
        return NULL;
    }
    n_aa2 = n_aa * n_aa; // square of the number of amino acids
    // The results will be returned in a numpy ndarray 'float_' (C type double) array called mlist.
    // This array will be of size length * n_uts * n_aa2.
    arr_mlist = (double *) PyArray_DATA(mlist); // this is the data array of mlist
    long const sizeof_cp_inv = n_aa2 * sizeof(double);
    long const complex_sizeof_cp_inv = n_aa2 * sizeof(complex double);
    // Now begin filling arr_mlist with the appropriate values
    index = 0;
    only_need_index = residue_to_compute * n_uts;
    r_grs_tuple = PyList_GET_ITEM(grs, residue_to_compute);
    // gr_diag, p, and p_inv are the eigenvalues, left, and right diagonalizing matrices of gr
    gr_diag = PyTuple_GET_ITEM(r_grs_tuple, 0); 
    p = PyTuple_GET_ITEM(r_grs_tuple, 1);
    p_inv = PyTuple_GET_ITEM(r_grs_tuple, 2);
    // determine if these arrays are complex double or real doubles
    array_type = PyArray_TYPE(gr_diag);
    if (array_type == NPY_DOUBLE) { // array is of doubles, real not complex
        cp_inv_exp = (double *) malloc(sizeof_cp_inv); // allocate memory
        // Note that these next assignments assume that the arrays are C-style contiguous
        cp = PyArray_DATA(p);
        cp_inv = PyArray_DATA(p_inv);
        cgr_diag = PyArray_DATA(gr_diag);
        for (i_ut = 0; i_ut < n_uts; i_ut++) { // loop over ut values
            only_need_i = PyInt_AS_LONG(PyList_GET_ITEM(only_need, only_need_index)); // value of only_need
            only_need_index++;
            if (only_need_i == -1) { // we don't need to do anything for these entries in mlist
                index += n_aa2;
            } else { // we need to compute at least some entries in mlist
                ut = PyFloat_AS_DOUBLE(PyList_GET_ITEM(uts, i_ut));  // ut value
                for (irowcolumn = 0; irowcolumn < n_aa; irowcolumn++) {
                    exp_ut = exp(ut * cgr_diag[irowcolumn]);
                    for (y = irowcolumn * n_aa; y < (irowcolumn + 1) * n_aa; y++) {
                        cp_inv_exp[y] = cp_inv[y] * exp_ut;
                    }
                }
                if (only_need_i == -2) { // we need to compute all of these entries in mlist
#ifdef USE_ACCELERATE_CBLAS
                    // multiply the matrices using cblas_dgemm, and fill mlist with the results
                    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n_aa, n_aa, n_aa, (double) 1.0, cp, n_aa, cp_inv_exp, n_aa, (double) 0.0, &arr_mlist[index], n_aa);
                    for (index2 = index; index2 < index + n_aa2; index2++) {
                        arr_mlist[index2] = fabs(arr_mlist[index2]);
                    }
                    index += n_aa2;
#else
                    // multiply the matrices in pure C code, and fill mlist with the results
                    for (x = 0; x < n_aa2; x += n_aa) {
                        for (y = 0; y < n_aa; y++) {
                            mxy = 0.0;
                            yindex = y;
                            for (irowcolumn = x; irowcolumn < x + n_aa; irowcolumn++) {
                                mxy += cp[irowcolumn] * cp_inv_exp[yindex];
                                yindex += n_aa;
                            }
                            arr_mlist[index++] = fabs(mxy);
                        }
                    }
#endif
                } else { // we need to compute entries in mlist only for x = only_need_i
                    only_need_i_n_aa = only_need_i * n_aa;
                    index += only_need_i_n_aa;
#ifdef USE_ACCELERATE_CBLAS
                    // do the matrix vector multiplication using cblas, and put results in mlist
                    cblas_dgemv(CblasRowMajor, CblasTrans, n_aa, n_aa, (double) 1.0, cp_inv_exp, n_aa, &cp[only_need_i_n_aa], 1, (double) 0.0, &arr_mlist[index], 1); 
                    for (index2 = index; index2 < index + n_aa2 - only_need_i_n_aa; index2++) {
                        arr_mlist[index2] = fabs(arr_mlist[index2]);
                    }
                    index += n_aa2 - only_need_i_n_aa;
#else
                    // do the matrix vector multiplication in pure C code, and put results in mlist
                    for (y = 0; y < n_aa; y++) {
                        mxy = 0.0;
                        yindex = y;
                        for (irowcolumn = only_need_i_n_aa; irowcolumn < only_need_i_n_aa + n_aa; irowcolumn++) {
                            mxy += cp[irowcolumn] * cp_inv_exp[yindex];
                            yindex += n_aa;
                        }
                        arr_mlist[index++] = fabs(mxy);
                    }
                    index += n_aa2 - only_need_i_n_aa - n_aa;
#endif
                }
            }
        }
        free(cp_inv_exp);
    } else if (array_type == NPY_CDOUBLE) { // array is complex doubles
        complex_cp_inv_exp = (complex double *) malloc(complex_sizeof_cp_inv); // allocate memory
        complex_naa2_list = (complex double *) malloc(n_aa2 * sizeof(complex double)); // allocate memory
        complex_naa_list = (complex double *) malloc(n_aa * sizeof(complex double)); // allocate memory
        // Note that these next assignments assume that the arrays are C-style contiguous
        complex_cp = PyArray_DATA(p);
        complex_cp_inv = PyArray_DATA(p_inv);
        complex_cgr_diag = PyArray_DATA(gr_diag);
        for (i_ut = 0; i_ut < n_uts; i_ut++) { // loop over ut values
            only_need_i = PyInt_AS_LONG(PyList_GET_ITEM(only_need, only_need_index)); // value of only_need
            only_need_index++;
            if (only_need_i == -1) { // we don't need to do anything for these entries in mlist
                index += n_aa2;
            } else { // we need to compute at least some entries in mlist
                ut = PyFloat_AS_DOUBLE(PyList_GET_ITEM(uts, i_ut));  // ut value
                for (irowcolumn = 0; irowcolumn < n_aa; irowcolumn++) {
                    complex_exp_ut = cexp(ut * complex_cgr_diag[irowcolumn]);
                    for (y = irowcolumn * n_aa; y < (irowcolumn + 1) * n_aa; y++) {
                        complex_cp_inv_exp[y] = complex_cp_inv[y] * complex_exp_ut;
                    }
                }
                if (only_need_i == -2) { // we need to compute all of these entries in mlist
#ifdef USE_ACCELERATE_CBLAS
                    // multiply the matrices using cblas_zgemm, and fill mlist with the results
                    cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n_aa, n_aa, n_aa, &complex_one, complex_cp, n_aa, complex_cp_inv_exp, n_aa, &complex_zero, complex_naa2_list, n_aa); 
                    for (index2 = 0; index2 < n_aa2; index2++) {
                        arr_mlist[index + index2] = cabs(complex_naa2_list[index2]);
                    }
                    index += n_aa2;
#else
                    // multiply the matrices in pure C code, and fill mlist with the results
                    for (x = 0; x < n_aa2; x += n_aa) {
                        for (y = 0; y < n_aa; y++) {
                            complex_mxy = (complex double) 0.0;
                            yindex = y;
                            for (irowcolumn = x; irowcolumn < x + n_aa; irowcolumn++) {
                                complex_mxy += complex_cp[irowcolumn] * complex_cp_inv_exp[yindex];
                                yindex += n_aa;
                            }
                            arr_mlist[index++] = cabs(complex_mxy);
                        }
                    }
#endif
                } else { // we need to compute entries in mlist only for x = only_need_i
                    only_need_i_n_aa = only_need_i * n_aa;
                    index += only_need_i_n_aa;
#ifdef USE_ACCELERATE_CBLAS
                    // do the matrix vector multiplication using cblas, and put results in mlist
                    cblas_zgemv(CblasRowMajor, CblasTrans, n_aa, n_aa, &complex_one, complex_cp_inv_exp, n_aa, &complex_cp[only_need_i_n_aa], 1, &complex_zero, complex_naa_list, 1);  
                    for (index2 = 0; index2 < n_aa; index2++) {
                        arr_mlist[index + index2] = cabs(complex_naa_list[index2]);
                    }
                    index += n_aa2 - only_need_i_n_aa;
#else
                    // do the matrix vector multiplication in pure C code, and put results in mlist
                    for (y = 0; y < n_aa; y++) {
                        complex_mxy = (complex double) 0.0;
                        yindex = y;
                        for (irowcolumn = only_need_i_n_aa; irowcolumn < only_need_i_n_aa + n_aa; irowcolumn++) {
                            complex_mxy += complex_cp[irowcolumn] * complex_cp_inv_exp[yindex];
                            yindex += n_aa;
                        }
                        arr_mlist[index++] = cabs(complex_mxy);
                    }
                    index += n_aa2 - only_need_i_n_aa - n_aa;
#endif
                }
            }
        }
        free(complex_cp_inv_exp);
        free(complex_naa2_list);
        free(complex_naa_list);
    } else { // array is of neither real nor complex doubles
        PyErr_SetString(PyExc_ValueError, "gr entries are neither double nor complex doubles.");
        return NULL;
    }
    return PyInt_FromLong((long) 1);
}

コード例 #12

ファイルを表示

ファイル: stateset.cpp プロジェクト: borunda/itp2d

void StateSet::orthonormalize() throw(std::exception) {
	// Handle the trivial case N=1 separately
	if (N == 1) {
		ortho_timer.start();
		const double norm = (*state_array)[0].norm();
		(*state_array)[0] *= 1.0/norm;
		ortho_timer.stop();
		return;
	}
	ortho_timer.start();
	dot_timer.start();
	// NOTE: Because Eigensolver uses LAPACK, the overlap matrix is stored in column-major format
	#pragma omp parallel for
	for (size_t i=0; i<N; i++) {
		for (size_t j=i; j<N; j++) {
			overlapmatrix[N*j+i] = dot(i,j);
		}
	}
	dot_time += dot_timer.stop();
	// Solve eigenvalue problem for the overlap matrix
	eigensolve_timer.start();
	ESolver.solve(overlapmatrix);
	for (size_t n=0; n<N; n++) {
		const double eval = ESolver.eigenvalue(n);
		// Check that eigenvalues are OK. If states are propagated "too much",
		// they can become linearly dependent (or close enough so), which
		// causes the overlap matrix to have non-positive eigenvalues and as a
		// result the orthonormalization will fail.
		if (eval <= 0) {
			ortho_time += ortho_timer.stop();
			eigensolve_time += eigensolve_timer.stop();
			throw(NonPositiveEigenvalue(n, eval, overlapmatrix, N));
		}
		else if (std::fpclassify(eval) != FP_NORMAL) {
			ortho_time += ortho_timer.stop();
			eigensolve_time += eigensolve_timer.stop();
			throw(NonNormalEigenvalue(n, eval, overlapmatrix, N));
		}
		// Scale eigenvectors with the eigenvalues
		ESolver.scale_eigenvector(overlapmatrix, n, 1/sqrt(eval));
	}
	eigensolve_time += eigensolve_timer.stop();
	// Form orthonormal states from linear combinations
	lincomb_timer.start();
	const comp one = 1;
	const comp zero = 0;
	const int iN = static_cast<int>(N);
	const int iM = static_cast<int>(datalayout.N);
	comp* const statedata = state_array->get_dataptr();
	switch (ortho_algorithm) {
		case Default:
			// This is the in-place version, which uses less memory
			#pragma omp parallel
			{
				const size_t required_size = N*omp_get_num_threads();
				const size_t thread_offset = N*omp_get_thread_num();
				#pragma omp single
				{
				// Check for enough space on tempstate
				if (tempstate.size() < required_size)
					tempstate.resize(required_size);
				}
				comp* const temp = tempstate.data() + thread_offset;
				comp c;
				#pragma omp for
				for (size_t t=0; t<datalayout.N; t++) {
					// Save old state values
					cblas_zcopy(iN, statedata+t, iM, temp, 1);
					// Note that now overlapmatrix holds the eigenvectors
					cblas_zgemv(CblasRowMajor, CblasNoTrans, iN, iN, &one, overlapmatrix,
							iN, temp, 1, &zero, statedata+t, iM);
				}
			}
			break;
		case HighMem:
			// This is the out-of-place version, where the formation of linear
			// combinations can be expressed simply as a product of two (very
			// large) matrices.
			comp* const other_statedata = other_state_array->get_dataptr();
			assert(statedata != NULL);
			assert(other_statedata != NULL);
			cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, iN, iM, iN, &one,
					overlapmatrix, iN, statedata, iM, &zero, other_statedata, iM);
			switch_state_arrays();
			break;
	}
	lincomb_time += lincomb_timer.stop();
	ortho_time += ortho_timer.stop();
}

コード例 #13

ファイルを表示

ファイル: batchf_zgemv.c プロジェクト: sdrelton/bblas_api_test

void batchf_zgemv(
	const enum BBLAS_TRANS trans,
	const int m, const int n,
	const BBLAS_Complex64_t alpha,
	const BBLAS_Complex64_t **arrayA, const int lda,
	const BBLAS_Complex64_t **arrayx, const int incx,
	const BBLAS_Complex64_t beta,
	BBLAS_Complex64_t **arrayy, const int incy,
	const int batch_count, int info)
{
	/* Local variables */
	int first_index = 0;
	int batch_iter = 0;
	char func_name[15] = "batchf_zgemv";

	/* Check input arguments */
	if (batch_count < 0)
	{
		xerbla_batch(func_name, BBLAS_ERR_BATCH_COUNT, -1);
	}

	if ((trans != BblasTrans) &&
		(trans != BblasNoTrans) &&
		(trans != BblasConjTrans))
	{
		xerbla_batch(func_name, BBLAS_ERR_TRANS, first_index);
		info = BBLAS_ERR_TRANS;
	}

	if (m < 0)
	{
		xerbla_batch(func_name, BBLAS_ERR_M, first_index);
		info = BBLAS_ERR_M;
	}

	if (n < 0)
	{
		xerbla_batch(func_name, BBLAS_ERR_N, first_index);
		info = BBLAS_ERR_N;
	}

	/* Column major */
	if ((lda < 1) && (lda < m))
	{
		xerbla_batch(func_name, BBLAS_ERR_LDA, first_index);
		info = BBLAS_ERR_LDA;
	}

	if (incx < 1)
	{
		xerbla_batch(func_name, BBLAS_ERR_INCX, first_index);
		info = BBLAS_ERR_INCX;
	}
	if (incy < 1)
	{
		xerbla_batch(func_name, BBLAS_ERR_INCY, first_index);
		info = BBLAS_ERR_INCY;
	}

	/* Call CBLAS */
	for (batch_iter = 0; batch_iter < batch_count; batch_iter++)
	{
		cblas_zgemv(
			BblasColMajor,
			trans,
			m, n,
			CBLAS_SADDR( alpha ),
			arrayA[batch_iter], lda,
			arrayx[batch_iter], incx,
			CBLAS_SADDR( beta ),
			arrayy[batch_iter], incy);
	} /* End fixed size for loop */
	/* Successful */
	info = BBLAS_SUCCESS;
}