void F77_cgemv(int *order, char *transp, int *m, int *n, const void *alpha, CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx, const void *beta, void *y, int *incy) { CBLAS_TEST_COMPLEX *A; int i,j,LDA; enum CBLAS_TRANSPOSE trans; get_transpose_type(transp, &trans); if (*order == TEST_ROW_MJR) { LDA = *n+1; A = (CBLAS_TEST_COMPLEX *)malloc( (*m)*LDA*sizeof( CBLAS_TEST_COMPLEX) ); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ){ A[ LDA*i+j ].real=a[ (*lda)*j+i ].real; A[ LDA*i+j ].imag=a[ (*lda)*j+i ].imag; } cblas_cgemv( CblasRowMajor, trans, *m, *n, alpha, A, LDA, x, *incx, beta, y, *incy ); free(A); } else if (*order == TEST_COL_MJR) cblas_cgemv( CblasColMajor, trans, *m, *n, alpha, a, *lda, x, *incx, beta, y, *incy ); else cblas_cgemv( UNDEFINED, trans, *m, *n, alpha, a, *lda, x, *incx, beta, y, *incy ); }
inline void gemv( const Order, const Trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* a, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy ) { cblas_cgemv( cblas_option< Order >::value, cblas_option< Trans >::value, m, n, &alpha, a, lda, x, incx, &beta, y, incy ); }
VrArrayPtrCF32 BlasComplexSingle::vec_mult(CBLAS_ORDER order, CBLAS_TRANSPOSE transA, VrArrayPtrCF32 A, VrArrayPtrCF32 X, float complex alpha, int incX,int incY,float complex beta) { int dims[2]; dims[0]=VR_GET_DIMS_CF32(A)[0]; dims[1]=1; VrArrayPtrCF32 Y=vrAllocArrayF32CM(2,0,(int*)dims); const float alph[] = {1,0}; const float bet[] = {0,0}; cblas_cgemv(order, transA , VR_GET_DIMS_CF32(A)[0],VR_GET_DIMS_CF32(A)[1],(alph), (float*)VR_GET_DATA_CF32(A),VR_GET_DIMS_CF32(A)[0],(float*)VR_GET_DATA_CF32(X), incX,bet,(float*)VR_GET_DATA_CF32(Y),incY ); return Y; }
void phi_hemv(const int N, const Complex *alpha, const Complex *A, const Complex *X, const Complex *beta, Complex *Y){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_cgemv(CblasColMajor,CblasNoTrans,N,N,alpha,A,N,X,1,beta,Y,1); #else cblas_zgemv(CblasColMajor,CblasNoTrans,N,N,alpha,A,N,X,1,beta,Y,1); #endif #else phi_gemv(N,alpha,A,X,beta,Y); #endif }
inline void gemv (CBLAS_ORDER const Order, CBLAS_TRANSPOSE const TransA, int const M, int const N, traits::complex_f const& alpha, traits::complex_f const* A, int const lda, traits::complex_f const* X, int const incX, traits::complex_f const& beta, traits::complex_f* Y, int const incY) { cblas_cgemv (Order, TransA, M, N, static_cast<void const*> (&alpha), static_cast<void const*> (A), lda, static_cast<void const*> (X), incX, static_cast<void const*> (&beta), static_cast<void*> (Y), incY); }
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_cgemv (JNIEnv * env, jobject calling_obj, jint order, jint transA, jint M, jint N, jfloatArray jAlpha, jfloatArray jA, jint lda, jfloatArray jX, jint incX, jfloatArray jBeta, jfloatArray jY, jint incY){ jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); jfloat * alpha = (*env)->GetPrimitiveArrayCritical(env, jAlpha, JNI_FALSE); jfloat * beta = (*env)->GetPrimitiveArrayCritical(env, jBeta, JNI_FALSE); cblas_cgemv((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, M, N, alpha, A, lda, X, incX, beta, Y, incY); (*env)->ReleasePrimitiveArrayCritical(env, jBeta, beta, 0); (*env)->ReleasePrimitiveArrayCritical(env, jAlpha, alpha, 0); (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); }
inline void gemv(CBLAS_ORDER const Order, CBLAS_TRANSPOSE const TransA, int const M, int const N, double alpha, std::complex<float> const *A, int const lda, std::complex<float> const *X, int const incX, double beta, std::complex<float> *Y, int const incY ) { std::complex<float> alphaArg(alpha,0); std::complex<float> betaArg(beta,0); cblas_cgemv(Order, TransA, M, N, reinterpret_cast<cblas_float_complex_type const *>(&alphaArg), reinterpret_cast<cblas_float_complex_type const *>(A), lda, reinterpret_cast<cblas_float_complex_type const *>(X), incX, reinterpret_cast<cblas_float_complex_type const *>(&betaArg), reinterpret_cast<cblas_float_complex_type *>(Y), incY); }
void phi_gemv(const int N, const Complex *alpha, const Complex *A, const Complex *X, const Complex *beta, Complex *Y){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_cgemv(CblasColMajor,CblasNoTrans,N,N,alpha,A,N,X,1,beta,Y,1); #else cblas_zgemv(CblasColMajor,CblasNoTrans,N,N,alpha,A,N,X,1,beta,Y,1); #endif #else int i,n; //multiply beta: for(i = 0; i < N; ++i) Y[i] *= (*beta); for(i = 0; i < N; ++i) { for (n = 0; n < N; ++n) Y[i] += (*alpha)*A[n*N+i]*X[n]; } #endif }
void wrapper_cblas_cgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY) { cblas_cgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
int CORE_ctsqrt(int M, int N, int IB, PLASMA_Complex32_t *A1, int LDA1, PLASMA_Complex32_t *A2, int LDA2, PLASMA_Complex32_t *T, int LDT, PLASMA_Complex32_t *TAU, PLASMA_Complex32_t *WORK) { static PLASMA_Complex32_t zone = 1.0; static PLASMA_Complex32_t zzero = 0.0; PLASMA_Complex32_t alpha; int i, ii, sb; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA2"); return -8; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:M, II*IB+I ) */ LAPACKE_clarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]); if (ii+i+1 < N) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left */ alpha = -conjf(TAU[ii+i]); cblas_ccopy( sb-i-1, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(sb-i-1, WORK, 1); #endif cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, sb-i-1, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(sb-i-1, WORK, 1 ); #endif cblas_caxpy( sb-i-1, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_clacgv_work(sb-i-1, WORK, 1 ); #endif cblas_cgerc( CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T */ alpha = -TAU[ii+i]; cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1); cblas_ctrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); T[LDT*(ii+i)+i] = TAU[ii+i]; } if (N > ii+sb) { CORE_ctsmqr( PlasmaLeft, PlasmaConjTrans, sb, N-(ii+sb), M, N-(ii+sb), IB, IB, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
void test_gemv (void) { const double flteps = 1e-4, dbleps = 1e-6; { int order = 101; int trans = 111; int M = 1; int N = 1; int lda = 1; float alpha = 1.0f; float beta = -0.3f; float A[] = { -0.805f }; float X[] = { -0.965f }; int incX = -1; float Y[] = { 0.537f }; int incY = -1; float y_expected[] = { 0.615725f }; cblas_sgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 774)"); } }; }; { int order = 102; int trans = 111; int M = 1; int N = 1; int lda = 1; float alpha = 1.0f; float beta = -0.3f; float A[] = { -0.805f }; float X[] = { -0.965f }; int incX = -1; float Y[] = { 0.537f }; int incY = -1; float y_expected[] = { 0.615725f }; cblas_sgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 775)"); } }; }; { int order = 101; int trans = 112; int M = 1; int N = 1; int lda = 1; float alpha = 1.0f; float beta = 0.0f; float A[] = { -0.805f }; float X[] = { -0.965f }; int incX = -1; float Y[] = { 0.537f }; int incY = -1; float y_expected[] = { 0.776825f }; cblas_sgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 776)"); } }; }; { int order = 102; int trans = 112; int M = 1; int N = 1; int lda = 1; float alpha = 1.0f; float beta = 0.0f; float A[] = { -0.805f }; float X[] = { -0.965f }; int incX = -1; float Y[] = { 0.537f }; int incY = -1; float y_expected[] = { 0.776825f }; cblas_sgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], flteps, "sgemv(case 777)"); } }; }; { int order = 101; int trans = 111; int M = 1; int N = 1; int lda = 1; double alpha = -0.3; double beta = -1; double A[] = { -0.047 }; double X[] = { 0.672 }; int incX = -1; double Y[] = { 0.554 }; int incY = -1; double y_expected[] = { -0.5445248 }; cblas_dgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 778)"); } }; }; { int order = 102; int trans = 111; int M = 1; int N = 1; int lda = 1; double alpha = -0.3; double beta = -1; double A[] = { -0.047 }; double X[] = { 0.672 }; int incX = -1; double Y[] = { 0.554 }; int incY = -1; double y_expected[] = { -0.5445248 }; cblas_dgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 779)"); } }; }; { int order = 101; int trans = 112; int M = 1; int N = 1; int lda = 1; double alpha = -1; double beta = 1; double A[] = { -0.047 }; double X[] = { 0.672 }; int incX = -1; double Y[] = { 0.554 }; int incY = -1; double y_expected[] = { 0.585584 }; cblas_dgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 780)"); } }; }; { int order = 102; int trans = 112; int M = 1; int N = 1; int lda = 1; double alpha = -1; double beta = 1; double A[] = { -0.047 }; double X[] = { 0.672 }; int incX = -1; double Y[] = { 0.554 }; int incY = -1; double y_expected[] = { 0.585584 }; cblas_dgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[i], y_expected[i], dbleps, "dgemv(case 781)"); } }; }; { int order = 101; int trans = 111; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.1f}; float beta[2] = {0.0f, 1.0f}; float A[] = { 0.629f, 0.801f }; float X[] = { 0.778f, -0.073f }; int incX = -1; float Y[] = { -0.976f, -0.682f }; int incY = -1; float y_expected[] = { 0.624274f, -0.921216f }; cblas_cgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 782) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 782) imag"); }; }; }; { int order = 102; int trans = 111; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.1f}; float beta[2] = {0.0f, 1.0f}; float A[] = { 0.629f, 0.801f }; float X[] = { 0.778f, -0.073f }; int incX = -1; float Y[] = { -0.976f, -0.682f }; int incY = -1; float y_expected[] = { 0.624274f, -0.921216f }; cblas_cgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 783) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 783) imag"); }; }; }; { int order = 101; int trans = 112; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 1.0f}; float beta[2] = {-0.3f, 0.1f}; float A[] = { 0.629f, 0.801f }; float X[] = { 0.778f, -0.073f }; int incX = -1; float Y[] = { -0.976f, -0.682f }; int incY = -1; float y_expected[] = { -0.216261f, 0.654835f }; cblas_cgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 784) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 784) imag"); }; }; }; { int order = 102; int trans = 112; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 1.0f}; float beta[2] = {-0.3f, 0.1f}; float A[] = { 0.629f, 0.801f }; float X[] = { 0.778f, -0.073f }; int incX = -1; float Y[] = { -0.976f, -0.682f }; int incY = -1; float y_expected[] = { -0.216261f, 0.654835f }; cblas_cgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 785) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 785) imag"); }; }; }; { int order = 101; int trans = 113; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.1f}; float beta[2] = {-0.3f, 0.1f}; float A[] = { 0.629f, 0.801f }; float X[] = { 0.778f, -0.073f }; int incX = -1; float Y[] = { -0.976f, -0.682f }; int incY = -1; float y_expected[] = { 0.427909f, 0.150089f }; cblas_cgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 786) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 786) imag"); }; }; }; { int order = 102; int trans = 113; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.1f}; float beta[2] = {-0.3f, 0.1f}; float A[] = { 0.629f, 0.801f }; float X[] = { 0.778f, -0.073f }; int incX = -1; float Y[] = { -0.976f, -0.682f }; int incY = -1; float y_expected[] = { 0.427909f, 0.150089f }; cblas_cgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], flteps, "cgemv(case 787) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], flteps, "cgemv(case 787) imag"); }; }; }; { int order = 101; int trans = 111; int M = 1; int N = 1; int lda = 1; double alpha[2] = {0, 0.1}; double beta[2] = {1, 0}; double A[] = { 0.932, -0.724 }; double X[] = { 0.334, -0.317 }; int incX = -1; double Y[] = { 0.348, 0.07 }; int incY = -1; double y_expected[] = { 0.401726, 0.078178 }; cblas_zgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 788) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 788) imag"); }; }; }; { int order = 102; int trans = 111; int M = 1; int N = 1; int lda = 1; double alpha[2] = {0, 0.1}; double beta[2] = {1, 0}; double A[] = { 0.932, -0.724 }; double X[] = { 0.334, -0.317 }; int incX = -1; double Y[] = { 0.348, 0.07 }; int incY = -1; double y_expected[] = { 0.401726, 0.078178 }; cblas_zgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 789) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 789) imag"); }; }; }; { int order = 101; int trans = 112; int M = 1; int N = 1; int lda = 1; double alpha[2] = {-0.3, 0.1}; double beta[2] = {0, 1}; double A[] = { 0.932, -0.724 }; double X[] = { 0.334, -0.317 }; int incX = -1; double Y[] = { 0.348, 0.07 }; int incY = -1; double y_expected[] = { -0.040808, 0.517356 }; cblas_zgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 790) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 790) imag"); }; }; }; { int order = 102; int trans = 112; int M = 1; int N = 1; int lda = 1; double alpha[2] = {-0.3, 0.1}; double beta[2] = {0, 1}; double A[] = { 0.932, -0.724 }; double X[] = { 0.334, -0.317 }; int incX = -1; double Y[] = { 0.348, 0.07 }; int incY = -1; double y_expected[] = { -0.040808, 0.517356 }; cblas_zgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 791) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 791) imag"); }; }; }; { int order = 101; int trans = 113; int M = 1; int N = 1; int lda = 1; double alpha[2] = {1, 0}; double beta[2] = {0, 0}; double A[] = { 0.932, -0.724 }; double X[] = { 0.334, -0.317 }; int incX = -1; double Y[] = { 0.348, 0.07 }; int incY = -1; double y_expected[] = { 0.540796, -0.053628 }; cblas_zgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 792) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 792) imag"); }; }; }; { int order = 102; int trans = 113; int M = 1; int N = 1; int lda = 1; double alpha[2] = {1, 0}; double beta[2] = {0, 0}; double A[] = { 0.932, -0.724 }; double X[] = { 0.334, -0.317 }; int incX = -1; double Y[] = { 0.348, 0.07 }; int incY = -1; double y_expected[] = { 0.540796, -0.053628 }; cblas_zgemv(order, trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(Y[2*i], y_expected[2*i], dbleps, "zgemv(case 793) real"); gsl_test_rel(Y[2*i+1], y_expected[2*i+1], dbleps, "zgemv(case 793) imag"); }; }; }; }
int CORE_cttqrt(int M, int N, int IB, PLASMA_Complex32_t *A1, int LDA1, PLASMA_Complex32_t *A2, int LDA2, PLASMA_Complex32_t *T, int LDT, PLASMA_Complex32_t *TAU, PLASMA_Complex32_t *WORK) { static PLASMA_Complex32_t zone = 1.0; static PLASMA_Complex32_t zzero = 0.0; static int ione = 1; PLASMA_Complex32_t alpha; int i, j, ii, sb, mi, ni; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:mi, II*IB+I ). */ mi = ii + i + 1; LAPACKE_clarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]); if (sb-i-1>0) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left. */ ni = sb-i-1; cblas_ccopy( ni, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(ni, WORK, ione); #endif cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, mi, ni, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_clacgv_work(ni, WORK, ione); #endif alpha = -conjf(TAU[ii+i]); cblas_caxpy( ni, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_clacgv_work(ni, WORK, ione); #endif cblas_cgerc( CblasColMajor, mi, ni, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T. */ if (i > 0 ) { cblas_ccopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1); cblas_ctrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &A2[LDA2*ii+ii], LDA2, &WORK[ii], 1); alpha = -(TAU[ii+i]); for(j = 0; j < i; j++) { WORK[ii+j] = alpha * WORK[ii+j]; } if (ii > 0) { cblas_cgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), WORK, 1); cblas_caxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1); } cblas_ccopy(i, WORK, 1, &T[LDT*(ii+i)], 1); cblas_ctrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); } T[LDT*(ii+i)+i] = TAU[ii+i]; } /* Apply Q' to the rest of the matrix to the left */ if (N > ii+sb) { CORE_cttrfb( PlasmaLeft, PlasmaConjTrans, PlasmaForward, PlasmaColumnwise, sb, N-(ii+sb), ii+sb, N-(ii+sb), sb, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }