Esempio n. 1
0
int check_solution(int M, int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB)
{
    int info_solution;
    float Rnorm, Anorm, Xnorm, Bnorm;
    PLASMA_Complex32_t alpha, beta;
    float *work = (float *)malloc(max(M, N)* sizeof(float));
    float eps;

    eps = LAPACKE_slamch_work('e');

    alpha = 1.0;
    beta  = -1.0;

    Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work);
    Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work);
    Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);

    cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);

    if (M >= N) {
       PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*NRHS*sizeof(PLASMA_Complex32_t));
       memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex32_t));
       cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M);
       Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work);
       free(Residual);
    }
    else {
       PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*NRHS*sizeof(PLASMA_Complex32_t));
       memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex32_t));
       cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N);
       Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work);
       free(Residual);
    }

    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps));

    if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) {
         printf("-- The solution is suspicious ! \n");
         info_solution = 1;
    }
    else {
         printf("-- The solution is CORRECT ! \n");
         info_solution= 0 ;
    }

    free(work);

    return info_solution;
}
Esempio n. 2
0
static int check_solution(int M, int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB, float eps)
{
    int info_solution;
    float Rnorm, Anorm, Xnorm, Bnorm;
    PLASMA_Complex32_t alpha, beta;
    float result;
    float *work = (float *)malloc(max(M, N)* sizeof(float));

    alpha = 1.0;
    beta  = -1.0;

    BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, N,    A1, LDA, &Anorm );
    BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, NRHS, B2, LDB, &Xnorm );
    BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B1, LDB, &Bnorm );

    cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);

    if (M >= N) {
       PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*NRHS*sizeof(PLASMA_Complex32_t));
       memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex32_t));
       cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M);
       BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, NRHS, Residual, M, &Rnorm );
       free(Residual);
    }
    else {
       PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*NRHS*sizeof(PLASMA_Complex32_t));
       memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex32_t));
       cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N);
       BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, NRHS, Residual, N, &Rnorm );
       free(Residual);
    }

    if (getenv("PLASMA_TESTING_VERBOSE"))
       printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm );

    result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);

    if (  isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- The solution is suspicious ! \n");
        info_solution = 1;
     }
    else{
        printf("-- The solution is CORRECT ! \n");
        info_solution = 0;
    }
    free(work);
    return info_solution;
}
Esempio n. 3
0
void CORE_cgemm_quark(Quark *quark)
{
    int transA;
    int transB;
    int m;
    int n;
    int k;
    PLASMA_Complex32_t alpha;
    PLASMA_Complex32_t *A;
    int lda;
    PLASMA_Complex32_t *B;
    int ldb;
    PLASMA_Complex32_t beta;
    PLASMA_Complex32_t *C;
    int ldc;

    quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
    cblas_cgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        m, n, k,
        CBLAS_SADDR(alpha), A, lda,
        B, ldb,
        CBLAS_SADDR(beta), C, ldc);
}
Esempio n. 4
0
/////////////////////////////////////////////////////////////////////
//         REPLACEMENT FUNCTIONS IN CASE BLAS NOT AVAILABLE        //
//      THESE ARE SLOW - NO ATTEMPTS AT OPTIMISATION WERE MADE     //
//           THESE ARE *NOT* THE GENERAL BLAS FUNCTIONS:           //
//        THEY **ONLY** APPLY TO SQUARE ROW-MAJOR MATRICES!!!      //
/////////////////////////////////////////////////////////////////////
//void phi_gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
//                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
//                 const int K, const void *alpha, const void *A,
//                 const int lda, const void *B, const int ldb,
//                 const void *beta, void *C, const int ldc) {
void phi_gemm(const int N, const Complex *alpha, const Complex *A,
              const Complex *B, const Complex *beta, Complex *C) {
#ifndef NOBLAS
    #ifdef SINGLEPRECISION
    cblas_cgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,alpha,A,N,B,N,beta,C,N);
    #else
    cblas_zgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,alpha,A,N,B,N,beta,C,N);
    #endif
#else

    int i,j,n;
       
    //multiply beta:
    //for(i = 0; i < N*N; ++i)
    //    C[i] *= (*beta);
    //cout << "beta= " << *beta << " alpha=" << *alpha << "\n";

    for(i = 0; i < N; ++i) {
        for(j = 0; j < N; ++j) {
            C[i+j*N] *= (*beta);
            for (n = 0; n < N; ++n)
                C[i+j*N] += (*alpha)*(A[i+n*N]*B[j*N+n]);
        }
    } 
#endif

}
Esempio n. 5
0
void CORE_cgemm_p2f1_quark(Quark* quark)
{
    int transA;
    int transB;
    int M;
    int N;
    int K;
    PLASMA_Complex32_t alpha;
    PLASMA_Complex32_t *A;
    int LDA;
    PLASMA_Complex32_t **B;
    int LDB;
    PLASMA_Complex32_t beta;
    PLASMA_Complex32_t *C;
    int LDC;
    void *fake1;
    
    quark_unpack_args_14(quark, transA, transB, M, N, K, alpha, 
                         A, LDA, B, LDB, beta, C, LDC, fake1);
    cblas_cgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        CBLAS_SADDR(alpha), A, LDA,
        *B, LDB,
        CBLAS_SADDR(beta), C, LDC);
}
static inline void CORE_cgetrf_reclap_update(const int M, const int column, const int n1, const int n2,
                                             PLASMA_Complex32_t *A, const int LDA, int *IPIV, 
                                             const int thidx, const int thcnt)
{
    static PLASMA_Complex32_t posone =  1.0;
    static PLASMA_Complex32_t negone = -1.0;
    PLASMA_Complex32_t *Atop  = A    + column*LDA;
    PLASMA_Complex32_t *Atop2 = Atop + n1    *LDA;
    int coff, ccnt, lm, loff;

    CORE_cbarrier_thread( thidx, thcnt );
    
    psplit( n2, thidx, thcnt, &coff, &ccnt );

    if (ccnt > 0) {
        CORE_claswap1( ccnt, Atop2 + coff*LDA, LDA, column, n1 + column, IPIV ); /* swap to the right */
        
        cblas_ctrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                     n1, ccnt, CBLAS_SADDR(posone), Atop + column, LDA, Atop2 + coff*LDA + column, LDA );
    }
    
    /* __sync_synchronize(); */ /* hopefully we will not need memory fences */
    
    /* need to wait for pivoting and triangular solve to finish */
    CORE_cbarrier_thread( thidx, thcnt );
    
    psplit( M, thidx, thcnt, &loff, &lm );
    if (thidx == 0) {
        loff = column + n1;
        lm  -= column + n1;
    };
    
    cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1,
                 CBLAS_SADDR(negone), Atop+loff, LDA, Atop2 + column, LDA, CBLAS_SADDR(posone), Atop2+loff, LDA );
}
Esempio n. 7
0
inline void gemm( const Order order, const TransA transa, const TransB transb,
        const int m, const int n, const int k,
        const std::complex<float> alpha, const std::complex<float>* a,
        const int lda, const std::complex<float>* b, const int ldb,
        const std::complex<float> beta, std::complex<float>* c,
        const int ldc ) {
    cblas_cgemm( cblas_option< Order >::value, cblas_option< TransA >::value,
            cblas_option< TransB >::value, m, n, k, &alpha, a, lda, b, ldb,
            &beta, c, ldc );
}
Esempio n. 8
0
VrArrayPtrCF32 BlasComplexSingle::mmult(CBLAS_ORDER order, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, VrArrayPtrCF32 A, VrArrayPtrCF32 B,float complex  alpha,float complex beta) {
	int dims[2];
	dims[0]=VR_GET_DIMS_CF32(A)[0];
	dims[1]=VR_GET_DIMS_CF32(B)[1];
	VrArrayPtrCF32 C=vrAllocArrayF32CM(2,0,(int*)dims);
	//float alph[]={1,0};
	//float bet[]={0,0};
        cblas_cgemm(order, transA, transB,VR_GET_DIMS_CF32(A)[0],VR_GET_DIMS_CF32(B)[1],VR_GET_DIMS_CF32(A)[1],reinterpret_cast<float*>(&alpha),(float*)VR_GET_DATA_CF32(A),VR_GET_DIMS_CF32(A)[0],
        (float*)VR_GET_DATA_CF32(B),VR_GET_DIMS_CF32(B)[0],reinterpret_cast<float*>(&beta),(float*)VR_GET_DATA_CF32(C),VR_GET_DIMS_CF32(C)[0]);
	return C;
}
Esempio n. 9
0
void CORE_cgemm(int transA, int transB,
                int M, int N, int K,
                PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
                PLASMA_Complex32_t *B, int LDB,
                PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
{
    cblas_cgemm(
        CblasColMajor,
        (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
        M, N, K,
        CBLAS_SADDR(alpha), A, LDA,
        B, LDB,
        CBLAS_SADDR(beta), C, LDC);
}
Esempio n. 10
0
void blas_cgemm(char transa, char transb, long M, long N,  long K, const complex float alpha, long lda, const complex float A[K][lda], long ldb, const complex float B[N][ldb], const complex float beta, long ldc, complex float C[N][ldc])
{
#ifdef USE_CUDA
#define CUCOMPLEX(x) (((union { cuComplex cu; complex float std; }){ .std = (x) }).cu)
        if (cuda_ondevice(A)) {

                cublasCgemm(transa, transb, M, N, K,  CUCOMPLEX(alpha),
                                (const cuComplex*)A, lda,
                                (const cuComplex*)B, ldb, CUCOMPLEX(beta),
                                (cuComplex*)C, ldc);
        } else
#endif
        cblas_cgemm(CblasColMajor, transa, transb, M, N, K, (void*)&alpha, (void*)A, lda, (void*)B, ldb, (void*)&beta, (void*)C, ldc);
}
Esempio n. 11
0
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_cgemm 
(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint transB, jint M, jint N, jint K, 
jfloatArray jAlpha, jfloatArray jA, jint lda, jfloatArray jB, jint ldb, jfloatArray jBeta, jfloatArray jC, jint ldc){
	jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE);
	jfloat * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE);
	jfloat * C = (*env)->GetPrimitiveArrayCritical(env, jC, JNI_FALSE);
	jfloat * alpha = (*env)->GetPrimitiveArrayCritical(env, jAlpha, JNI_FALSE);
	jfloat * beta = (*env)->GetPrimitiveArrayCritical(env, jBeta, JNI_FALSE);

	cblas_cgemm((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, 
                    alpha, A, lda, B, ldb, beta, C, ldc);

	(*env)->ReleasePrimitiveArrayCritical(env, jC, C, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0);
}
Esempio n. 12
0
 inline
 void gemm (CBLAS_ORDER const Order,
            CBLAS_TRANSPOSE const TransA, CBLAS_TRANSPOSE const TransB,
            int const M, int const N, int const K,
            traits::complex_f const& alpha,
            traits::complex_f const* A, int const lda,
            traits::complex_f const* B, int const ldb,
            traits::complex_f const& beta,
            traits::complex_f* C, int const ldc)
 {
   cblas_cgemm (Order, TransA, TransB, M, N, K,
                static_cast<void const*> (&alpha),
                static_cast<void const*> (A), lda,
                static_cast<void const*> (B), ldb,
                static_cast<void const*> (&beta),
                static_cast<void*> (C), ldc);
 }
Esempio n. 13
0
inline void gemm(CBLAS_ORDER const Order,
	CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
	int M, int N, int K,
	float alpha,
	std::complex<float> const *A, int lda,
	std::complex<float> const *B, int ldb,
	float beta,
	std::complex<float>* C, int ldc
) {
	std::complex<float> alphaArg(alpha,0);
	std::complex<float> betaArg(beta,0);
	cblas_cgemm(
		Order, TransA, TransB, M, N, K,
		static_cast<void const *>(&alphaArg),
		static_cast<void const *>(A), lda,
		static_cast<void const *>(B), ldb,
		static_cast<void const *>(&betaArg),
		static_cast<void *>(C), ldc
	);
}
Esempio n. 14
0
void bl1_cgemm_blas( trans1_t transa, trans1_t transb, int m, int n, int k, scomplex* alpha, scomplex* a, int lda, scomplex* b, int ldb, scomplex* beta, scomplex* c, int ldc )
{
#ifdef BLIS1_ENABLE_CBLAS_INTERFACES
	enum CBLAS_ORDER     cblas_order = CblasColMajor;
	enum CBLAS_TRANSPOSE cblas_transa;
	enum CBLAS_TRANSPOSE cblas_transb;

	bl1_param_map_to_netlib_trans( transa, &cblas_transa );
	bl1_param_map_to_netlib_trans( transb, &cblas_transb );

	cblas_cgemm( cblas_order,
	             cblas_transa,
	             cblas_transb,
	             m,
	             n,
	             k,
	             alpha,
	             a, lda,
	             b, ldb,
	             beta,
	             c, ldc );
#else
	char blas_transa;
	char blas_transb;

	bl1_param_map_to_netlib_trans( transa, &blas_transa );
	bl1_param_map_to_netlib_trans( transb, &blas_transb );

	F77_cgemm( &blas_transa,
	           &blas_transb,
	           &m,
	           &n,
	           &k,
	           alpha,
	           a, &lda,
	           b, &ldb,
	           beta,
	           c, &ldc );
#endif
}
Esempio n. 15
0
int check_solution(int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB )
{
    int info_solution;
    float Rnorm, Anorm, Xnorm, Bnorm;
    PLASMA_Complex32_t alpha, beta;
    float *work = (float *)malloc(N*sizeof(float));
    float eps;

    eps = LAPACKE_slamch_work('e');

    /* Initialize A1 and A2 for Symmetric Positive Matrix */
    alpha = 1.0;
    beta  = -1.0;

    Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work);
    Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work);
    Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);

    cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);
    Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);

    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps));

    if (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0){
        printf("-- The solution is suspicious ! \n");
        info_solution = 1;
     }
    else{
        printf("-- The solution is CORRECT ! \n");
        info_solution = 0;
    }

    free(work);

    return info_solution;
}
Esempio n. 16
0
unsigned long long benchmark_gemm(
	const void* memory, size_t cache_size,
	enum gemm_type type,
	size_t m, size_t n, size_t k,
	const float a[], const float b[], float c[],
	size_t max_iterations)
{
	unsigned long long computation_time[max_iterations];
	size_t computation_samples = 0;
	for (size_t iteration = 0; iteration < max_iterations; iteration++) {
		read_memory(memory, cache_size);

		unsigned long long start_time, end_time;
		if (!read_timer(&start_time))
			continue;

		switch (type) {
			case gemm_type_sgemm:
			{
#if defined(USE_MKL) || defined(USE_OPENBLAS)
				cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
					m, n, k,
					1.0f, a, k, b, k, 0.0f, c, n);
#elif defined(USE_BLIS)
				float alpha = 1.0f;
				float beta = 0.0f;
				bli_sgemm(BLIS_NO_TRANSPOSE, BLIS_TRANSPOSE,
					m, n, k,
					&alpha,
					(float*) a, k, 1,
					(float*) b, k, 1,
					&beta,
					c, n, 1);
#endif
				break;
			}
			case gemm_type_cgemm:
			{
#if defined(USE_MKL) || defined(USE_OPENBLAS)
				float alpha[2] = { 1.0f, 0.0f };
				float beta[2] = { 0.0f, 0.0f };
				cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
					m, n, k,
					alpha, a, k, b, k, beta, c, n);
#elif defined(USE_BLIS)
				scomplex alpha = { 1.0f, 0.0f };
				scomplex beta = { 0.0f, 0.0f };
				bli_cgemm(BLIS_NO_TRANSPOSE, BLIS_TRANSPOSE,
					m, n, k,
					&alpha,
					(scomplex*) a, k, 1,
					(scomplex*) b, k, 1,
					&beta,
					(scomplex*) c, n, 1);
#endif
				break;
			}
		}

		if (!read_timer(&end_time))
			continue;

		computation_time[computation_samples++] = end_time - start_time;
	}

	return median(computation_time, computation_samples);
}
Esempio n. 17
0
    DLLEXPORT void c_matrix_multiply(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, const blasint m, const blasint n, const blasint k, const openblas_complex_float alpha, const openblas_complex_float x[], const openblas_complex_float y[], const openblas_complex_float beta, openblas_complex_float c[]) {
        blasint lda = transA == CblasNoTrans ? m : k;
        blasint ldb = transB == CblasNoTrans ? k : n;

        cblas_cgemm(CblasColMajor, transA, transB, m, n, k, (float*)&alpha, (float*)x, lda, (float*)y, ldb, (float*)&beta, (float*)c, m);
    }
Esempio n. 18
0
template <typename fptype> static inline int
lapack_gemm_c(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
            const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
{
    int ldsrc1 = src1_step / sizeof(std::complex<fptype>);
    int ldsrc2 = src2_step / sizeof(std::complex<fptype>);
    int ldsrc3 = src3_step / sizeof(std::complex<fptype>);
    int lddst = dst_step / sizeof(std::complex<fptype>);
    int c_m, c_n, d_m;
    CBLAS_TRANSPOSE transA, transB;
    std::complex<fptype> cAlpha(alpha, 0.0);
    std::complex<fptype> cBeta(beta, 0.0);

    if(flags & CV_HAL_GEMM_2_T)
    {
        transB = CblasTrans;
        if(flags & CV_HAL_GEMM_1_T )
        {
            d_m = a_n;
        }
        else
        {
            d_m = a_m;
        }
    }
    else
    {
        transB = CblasNoTrans;
        if(flags & CV_HAL_GEMM_1_T )
        {
            d_m = a_n;
        }
        else
        {
            d_m = a_m;
        }
    }

    if(flags & CV_HAL_GEMM_3_T)
    {
        c_m = d_n;
        c_n = d_m;
    }
    else
    {
        c_m = d_m;
        c_n = d_n;
    }

    if(flags & CV_HAL_GEMM_1_T )
    {
        transA = CblasTrans;
        std::swap(a_n, a_m);
    }
    else
    {
        transA = CblasNoTrans;
    }

    if(src3 != dst && beta != 0.0 && src3_step != 0) {
        if(flags & CV_HAL_GEMM_3_T)
            transpose((std::complex<fptype>*)src3, ldsrc3, (std::complex<fptype>*)dst, lddst, c_m, c_n);
        else
            copy_matrix((std::complex<fptype>*)src3, ldsrc3, (std::complex<fptype>*)dst, lddst, c_m, c_n);
    }
    else if (src3 == dst && (flags & CV_HAL_GEMM_3_T)) //actually transposing C in this case done by openCV
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
    else if(src3_step == 0 && beta != 0.0)
        set_value((std::complex<fptype>*)dst, lddst, std::complex<fptype>(0.0, 0.0), d_m, d_n);

    if(typeid(fptype) == typeid(float))
        cblas_cgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, &cAlpha, (void*)src1, ldsrc1, (void*)src2, ldsrc2, &cBeta, (void*)dst, lddst);
    else if(typeid(fptype) == typeid(double))
        cblas_zgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, &cAlpha, (void*)src1, ldsrc1, (void*)src2, ldsrc2, &cBeta, (void*)dst, lddst);

    return CV_HAL_ERROR_OK;
}
Esempio n. 19
0
int CORE_cgessm(int M, int N, int K, int IB,
                int *IPIV,
                PLASMA_Complex32_t *L, int LDL,
                PLASMA_Complex32_t *A, int LDA)
{
    static PLASMA_Complex32_t zone  =  1.0;
    static PLASMA_Complex32_t mzone = -1.0;
    static int                ione  =  1;

    int i, sb;
    int tmp, tmp2;

    /* Check input arguments */
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (K < 0) {
        coreblas_error(3, "Illegal value of K");
        return -3;
    }
    if (IB < 0) {
        coreblas_error(4, "Illegal value of IB");
        return -4;
    }
    if ((LDL < max(1,M)) && (M > 0)) {
        coreblas_error(7, "Illegal value of LDL");
        return -7;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(9, "Illegal value of LDA");
        return -9;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (K == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    for(i = 0; i < K; i += IB) {
        sb = min(IB, K-i);
        /*
         * Apply interchanges to columns I*IB+1:IB*( I+1 )+1.
         */
        tmp  = i+1;
        tmp2 = i+sb;
        LAPACKE_claswp_work(LAPACK_COL_MAJOR, N, A, LDA, tmp, tmp2, IPIV, ione);
        /*
         * Compute block row of U.
         */
        cblas_ctrsm(
            CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
            sb, N, CBLAS_SADDR(zone),
            &L[LDL*i+i], LDL,
            &A[i], LDA );

        if (i+sb < M) {
        /*
        * Update trailing submatrix.
        */
        cblas_cgemm(
            CblasColMajor, CblasNoTrans, CblasNoTrans,
            M-(i+sb), N, sb,
            CBLAS_SADDR(mzone), &L[LDL*i+(i+sb)], LDL,
            &A[i], LDA,
            CBLAS_SADDR(zone), &A[i+sb], LDA );
        }
    }
    return PLASMA_SUCCESS;
}
Esempio n. 20
0
void F77_cgemm(int *layout, char *transpa, char *transpb, int *m, int *n, 
     int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
     CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, 
     CBLAS_TEST_COMPLEX *c, int *ldc ) {

  CBLAS_TEST_COMPLEX *A, *B, *C;
  int i,j,LDA, LDB, LDC;
  CBLAS_TRANSPOSE transa, transb;

  get_transpose_type(transpa, &transa);
  get_transpose_type(transpb, &transb);

  if (*layout == TEST_ROW_MJR) {
     if (transa == CblasNoTrans) {
        LDA = *k+1;
        A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
        for( i=0; i<*m; i++ )
           for( j=0; j<*k; j++ ) {
              A[i*LDA+j].real=a[j*(*lda)+i].real;
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
           }
     }
     else {
        LDA = *m+1;
        A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
        for( i=0; i<*k; i++ )
           for( j=0; j<*m; j++ ) {
              A[i*LDA+j].real=a[j*(*lda)+i].real;
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
           }
     }

     if (transb == CblasNoTrans) {
        LDB = *n+1;
        B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
        for( i=0; i<*k; i++ )
           for( j=0; j<*n; j++ ) {
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
           }
     }
     else {
        LDB = *k+1;
        B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
        for( i=0; i<*n; i++ )
           for( j=0; j<*k; j++ ) {
              B[i*LDB+j].real=b[j*(*ldb)+i].real;
              B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
           }
     }

     LDC = *n+1;
     C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
     for( j=0; j<*n; j++ )
        for( i=0; i<*m; i++ ) {
           C[i*LDC+j].real=c[j*(*ldc)+i].real;
           C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
        }
     cblas_cgemm( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
                  B, LDB, beta, C, LDC );
     for( j=0; j<*n; j++ )
        for( i=0; i<*m; i++ ) {
           c[j*(*ldc)+i].real=C[i*LDC+j].real;
           c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
        }
     free(A);
     free(B);
     free(C);
  }
  else if (*layout == TEST_COL_MJR)
     cblas_cgemm( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
                  b, *ldb, beta, c, *ldc );
  else
     cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
                  b, *ldb, beta, c, *ldc );
}
Esempio n. 21
0
static int check_factorization(int M, int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, PLASMA_Complex32_t *Q, float eps )
{
    float Anorm, Rnorm;
    PLASMA_Complex32_t alpha, beta;
    int info_factorization;
    int i,j;

    PLASMA_Complex32_t *Ql       = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
    PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
    float *work              = (float *)malloc(max(M,N)*sizeof(float));

    alpha=1.0;
    beta=0.0;

    if (M >= N) {
        /* Extract the R */
        PLASMA_Complex32_t *R = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
        memset((void*)R, 0, N*N*sizeof(PLASMA_Complex32_t));
        LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);

        /* Perform Ql=Q*R */
        memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex32_t));
        cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M);
        free(R);
    }
    else {
        /* Extract the L */
        PLASMA_Complex32_t *L = (PLASMA_Complex32_t *)malloc(M*M*sizeof(PLASMA_Complex32_t));
        memset((void*)L, 0, M*M*sizeof(PLASMA_Complex32_t));
        LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);

    /* Perform Ql=LQ */
        memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex32_t));
        cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M);
        free(L);
    }

    /* Compute the Residual */
    for (i = 0; i < M; i++)
        for (j = 0 ; j < N; j++)
            Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];

    BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, N, Residual, M, &Rnorm );
    BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, N, A2, LDA, &Anorm );

    if (M >= N) {
        printf("============\n");
        printf("Checking the QR Factorization \n");
        printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
    }
    else {
        printf("============\n");
        printf("Checking the LQ Factorization \n");
        printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
    }

    if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) {
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else {
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(work); free(Ql); free(Residual);

    return info_factorization;
}
Esempio n. 22
0
 void wrapper_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB,
                          const int M, const int N, const int K, const void *alpha,
                          const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
   {
              cblas_cgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   }