void F77_zgerc(int *layout, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *x, int *incx, CBLAS_TEST_ZOMPLEX *y, int *incy, CBLAS_TEST_ZOMPLEX *a, int *lda) { CBLAS_TEST_ZOMPLEX *A; int i,j,LDA; if (*layout == TEST_ROW_MJR) { LDA = *n+1; A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) ); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ) { A[ LDA*i+j ].real=a[ (*lda)*j+i ].real; A[ LDA*i+j ].imag=a[ (*lda)*j+i ].imag; } cblas_zgerc( CblasRowMajor, *m, *n, alpha, x, *incx, y, *incy, A, LDA ); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ) { a[ (*lda)*j+i ].real=A[ LDA*i+j ].real; a[ (*lda)*j+i ].imag=A[ LDA*i+j ].imag; } free(A); } else if (*layout == TEST_COL_MJR) cblas_zgerc( CblasColMajor, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); else cblas_zgerc( UNDEFINED, *m, *n, alpha, x, *incx, y, *incy, a, *lda ); }
inline void gerc (CBLAS_ORDER const Order, int const M, int const N, traits::complex_d const& alpha, traits::complex_d const* X, int const incX, traits::complex_d const* Y, int const incY, traits::complex_d* A, int const lda) { cblas_zgerc (Order, M, N, static_cast<void const*> (&alpha), static_cast<void const*> (X), incX, static_cast<void const*> (Y), incY, static_cast<void*> (A), lda); }
void phi_gerc(const int N, const Complex *alpha, const Complex *X, const Complex *Y, Complex *A){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_cgerc(CblasRowMajor,N,N,alpha,X,1,Y,1,A,N); #else cblas_zgerc(CblasRowMajor,N,N,alpha,X,1,Y,1,A,N); #endif #else int i,j; for(i = 0; i < N; ++i){ for(j = 0; j < N; ++j){ A[i*N+j] = (*alpha)*X[i]*Y[j]; } } #endif }
int CORE_zttlqt(int M, int N, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t zzero = 0.0; #ifdef COMPLEX static int ione = 1; #endif PLASMA_Complex64_t alpha; int i, j, l, ii, sb, mi, ni; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; /* TODO: Need to check why some cases require * this to not have uninitialized values */ CORE_zlaset( PlasmaUpperLower, IB, N, 0., 0., T, LDT); for(ii = 0; ii < M; ii += IB) { sb = min(M-ii, IB); for(i = 0; i < sb; i++) { j = ii + i; mi = sb-i-1; ni = min( j + 1, N); /* * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ). */ #ifdef COMPLEX LAPACKE_zlacgv_work(ni, &A2[j], LDA2); LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1); #endif LAPACKE_zlarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]); if (mi > 0) { /* * Apply H( j-1 ) to A( j:II+IB-1, j-1:M ) from the right. */ cblas_zcopy( mi, &A1[LDA1*j+(j+1)], 1, WORK, 1); cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, mi, ni, CBLAS_SADDR(zone), &A2[j+1], LDA2, &A2[j], LDA2, CBLAS_SADDR(zone), WORK, 1); alpha = -(TAU[j]); cblas_zaxpy( mi, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*j+j+1], 1); cblas_zgerc( CblasColMajor, mi, ni, CBLAS_SADDR(alpha), WORK, 1, &A2[j], LDA2, &A2[j+1], LDA2); } /* * Calculate T. */ if (i > 0 ) { l = min(i, max(0, N-ii)); alpha = -(TAU[j]); CORE_zpemv( PlasmaNoTrans, PlasmaRowwise, i , min(j, N), l, alpha, &A2[ii], LDA2, &A2[j], LDA2, zzero, &T[LDT*j], 1, WORK); /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */ cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*j], 1); } #ifdef COMPLEX LAPACKE_zlacgv_work(ni, &A2[j], LDA2 ); LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1 ); #endif T[LDT*j+i] = TAU[j]; } /* Apply Q to the rest of the matrix to the right */ if (M > ii+sb) { mi = M-(ii+sb); ni = min(ii+sb, N); l = min(sb, max(0, ni-ii)); CORE_zparfb( PlasmaRight, PlasmaNoTrans, PlasmaForward, PlasmaRowwise, mi, IB, mi, ni, sb, l, &A1[LDA1*ii+ii+sb], LDA1, &A2[ii+sb], LDA2, &A2[ii], LDA2, &T[LDT*ii], LDT, WORK, M); } } return PLASMA_SUCCESS; }
int CORE_zttqrt(int M, int N, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t zzero = 0.0; static int ione = 1; PLASMA_Complex64_t alpha; int i, j, ii, sb, mi, ni; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDA2"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:mi, II*IB+I ). */ mi = ii + i + 1; LAPACKE_zlarfg_work(mi+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], ione, &TAU[ii+i]); if (sb-i-1>0) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left. */ ni = sb-i-1; cblas_zcopy( ni, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(ni, WORK, ione); #endif cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, mi, ni, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(ni, WORK, ione); #endif alpha = -conj(TAU[ii+i]); cblas_zaxpy( ni, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_zlacgv_work(ni, WORK, ione); #endif cblas_zgerc( CblasColMajor, mi, ni, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T. */ if (i > 0 ) { cblas_zcopy(i, &A2[LDA2*(ii+i)+ii], 1, &WORK[ii], 1); cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaConjTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &A2[LDA2*ii+ii], LDA2, &WORK[ii], 1); alpha = -(TAU[ii+i]); for(j = 0; j < i; j++) { WORK[ii+j] = alpha * WORK[ii+j]; } if (ii > 0) { cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, ii, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), WORK, 1); cblas_zaxpy(i, CBLAS_SADDR(zone), &WORK[ii], 1, WORK, 1); } cblas_zcopy(i, WORK, 1, &T[LDT*(ii+i)], 1); cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); } T[LDT*(ii+i)+i] = TAU[ii+i]; } /* Apply Q' to the rest of the matrix to the left */ if (N > ii+sb) { CORE_zttrfb( PlasmaLeft, PlasmaConjTrans, PlasmaForward, PlasmaColumnwise, sb, N-(ii+sb), ii+sb, N-(ii+sb), sb, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
int CORE_ztsqrt(int M, int N, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t zzero = 0.0; PLASMA_Complex64_t alpha; int i, ii, sb; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA2 < max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA2"); return -8; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; for(ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for(i = 0; i < sb; i++) { /* * Generate elementary reflector H( II*IB+I ) to annihilate * A( II*IB+I:M, II*IB+I ) */ LAPACKE_zlarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]); if (ii+i+1 < N) { /* * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left */ alpha = -conj(TAU[ii+i]); cblas_zcopy( sb-i-1, &A1[LDA1*(ii+i+1)+(ii+i)], LDA1, WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(sb-i-1, WORK, 1); #endif cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, sb-i-1, CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zone), WORK, 1); #ifdef COMPLEX LAPACKE_zlacgv_work(sb-i-1, WORK, 1 ); #endif cblas_zaxpy( sb-i-1, CBLAS_SADDR(alpha), WORK, 1, &A1[LDA1*(ii+i+1)+ii+i], LDA1); #ifdef COMPLEX LAPACKE_zlacgv_work(sb-i-1, WORK, 1 ); #endif cblas_zgerc( CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha), &A2[LDA2*(ii+i)], 1, WORK, 1, &A2[LDA2*(ii+i+1)], LDA2); } /* * Calculate T */ alpha = -TAU[ii+i]; cblas_zgemv( CblasColMajor, (CBLAS_TRANSPOSE)PlasmaConjTrans, M, i, CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2, &A2[LDA2*(ii+i)], 1, CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1); cblas_ztrmv( CblasColMajor, (CBLAS_UPLO)PlasmaUpper, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_DIAG)PlasmaNonUnit, i, &T[LDT*ii], LDT, &T[LDT*(ii+i)], 1); T[LDT*(ii+i)+i] = TAU[ii+i]; } if (N > ii+sb) { CORE_ztsmqr( PlasmaLeft, PlasmaConjTrans, sb, N-(ii+sb), M, N-(ii+sb), IB, IB, &A1[LDA1*(ii+sb)+ii], LDA1, &A2[LDA2*(ii+sb)], LDA2, &A2[LDA2*ii], LDA2, &T[LDT*ii], LDT, WORK, sb); } } return PLASMA_SUCCESS; }
void test_ger (void) { const double flteps = 1e-4, dbleps = 1e-6; { int order = 101; int M = 1; int N = 1; int lda = 1; float alpha = 1.0f; float A[] = { -0.515f }; float X[] = { 0.611f }; int incX = -1; float Y[] = { -0.082f }; int incY = -1; float A_expected[] = { -0.565102f }; cblas_sger(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[i], A_expected[i], flteps, "sger(case 1390)"); } }; }; { int order = 102; int M = 1; int N = 1; int lda = 1; float alpha = 1.0f; float A[] = { -0.515f }; float X[] = { 0.611f }; int incX = -1; float Y[] = { -0.082f }; int incY = -1; float A_expected[] = { -0.565102f }; cblas_sger(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[i], A_expected[i], flteps, "sger(case 1391)"); } }; }; { int order = 101; int M = 1; int N = 1; int lda = 1; double alpha = 1; double A[] = { -0.809 }; double X[] = { -0.652 }; int incX = -1; double Y[] = { 0.712 }; int incY = -1; double A_expected[] = { -1.273224 }; cblas_dger(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[i], A_expected[i], dbleps, "dger(case 1392)"); } }; }; { int order = 102; int M = 1; int N = 1; int lda = 1; double alpha = 1; double A[] = { -0.809 }; double X[] = { -0.652 }; int incX = -1; double Y[] = { 0.712 }; int incY = -1; double A_expected[] = { -1.273224 }; cblas_dger(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[i], A_expected[i], dbleps, "dger(case 1393)"); } }; }; { int order = 101; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.0f}; float A[] = { -0.651f, 0.856f }; float X[] = { -0.38f, -0.235f }; int incX = -1; float Y[] = { -0.627f, 0.757f }; int incY = -1; float A_expected[] = { -0.651f, 0.856f }; cblas_cgeru(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], flteps, "cgeru(case 1394) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], flteps, "cgeru(case 1394) imag"); }; }; }; { int order = 101; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.0f}; float A[] = { -0.651f, 0.856f }; float X[] = { -0.38f, -0.235f }; int incX = -1; float Y[] = { -0.627f, 0.757f }; int incY = -1; float A_expected[] = { -0.651f, 0.856f }; cblas_cgerc(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], flteps, "cgerc(case 1395) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], flteps, "cgerc(case 1395) imag"); }; }; }; { int order = 102; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.0f}; float A[] = { -0.651f, 0.856f }; float X[] = { -0.38f, -0.235f }; int incX = -1; float Y[] = { -0.627f, 0.757f }; int incY = -1; float A_expected[] = { -0.651f, 0.856f }; cblas_cgeru(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], flteps, "cgeru(case 1396) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], flteps, "cgeru(case 1396) imag"); }; }; }; { int order = 102; int M = 1; int N = 1; int lda = 1; float alpha[2] = {0.0f, 0.0f}; float A[] = { -0.651f, 0.856f }; float X[] = { -0.38f, -0.235f }; int incX = -1; float Y[] = { -0.627f, 0.757f }; int incY = -1; float A_expected[] = { -0.651f, 0.856f }; cblas_cgerc(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], flteps, "cgerc(case 1397) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], flteps, "cgerc(case 1397) imag"); }; }; }; { int order = 101; int M = 1; int N = 1; int lda = 1; double alpha[2] = {-1, 0}; double A[] = { -0.426, 0.757 }; double X[] = { -0.579, -0.155 }; int incX = -1; double Y[] = { 0.831, 0.035 }; int incY = -1; double A_expected[] = { 0.049724, 0.90607 }; cblas_zgeru(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], dbleps, "zgeru(case 1398) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], dbleps, "zgeru(case 1398) imag"); }; }; }; { int order = 101; int M = 1; int N = 1; int lda = 1; double alpha[2] = {-1, 0}; double A[] = { -0.426, 0.757 }; double X[] = { -0.579, -0.155 }; int incX = -1; double Y[] = { 0.831, 0.035 }; int incY = -1; double A_expected[] = { 0.060574, 0.86554 }; cblas_zgerc(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], dbleps, "zgerc(case 1399) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], dbleps, "zgerc(case 1399) imag"); }; }; }; { int order = 102; int M = 1; int N = 1; int lda = 1; double alpha[2] = {-1, 0}; double A[] = { -0.426, 0.757 }; double X[] = { -0.579, -0.155 }; int incX = -1; double Y[] = { 0.831, 0.035 }; int incY = -1; double A_expected[] = { 0.049724, 0.90607 }; cblas_zgeru(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], dbleps, "zgeru(case 1400) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], dbleps, "zgeru(case 1400) imag"); }; }; }; { int order = 102; int M = 1; int N = 1; int lda = 1; double alpha[2] = {-1, 0}; double A[] = { -0.426, 0.757 }; double X[] = { -0.579, -0.155 }; int incX = -1; double Y[] = { 0.831, 0.035 }; int incY = -1; double A_expected[] = { 0.060574, 0.86554 }; cblas_zgerc(order, M, N, alpha, X, incX, Y, incY, A, lda); { int i; for (i = 0; i < 1; i++) { gsl_test_rel(A[2*i], A_expected[2*i], dbleps, "zgerc(case 1401) real"); gsl_test_rel(A[2*i+1], A_expected[2*i+1], dbleps, "zgerc(case 1401) imag"); }; }; }; }