void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { int i,j,LDA,LDB; CBLAS_TEST_ZOMPLEX *A, *B; enum CBLAS_SIDE side; enum CBLAS_DIAG diag; enum CBLAS_UPLO uplo; enum CBLAS_TRANSPOSE trans; get_uplo_type(uplow,&uplo); get_transpose_type(transp,&trans); get_diag_type(diagn,&diag); get_side_type(rtlf,&side); if (*order == TEST_ROW_MJR) { if (side == CblasLeft) { LDA = *m+1; A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX)); for( i=0; i<*m; i++ ) for( j=0; j<*m; j++ ) { A[i*LDA+j].real=a[j*(*lda)+i].real; A[i*LDA+j].imag=a[j*(*lda)+i].imag; } } else{ LDA = *n+1; A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX)); for( i=0; i<*n; i++ ) for( j=0; j<*n; j++ ) { A[i*LDA+j].real=a[j*(*lda)+i].real; A[i*LDA+j].imag=a[j*(*lda)+i].imag; } } LDB = *n+1; B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX)); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ) { B[i*LDB+j].real=b[j*(*ldb)+i].real; B[i*LDB+j].imag=b[j*(*ldb)+i].imag; } cblas_ztrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, A, LDA, B, LDB ); for( j=0; j<*n; j++ ) for( i=0; i<*m; i++ ) { b[j*(*ldb)+i].real=B[i*LDB+j].real; b[j*(*ldb)+i].imag=B[i*LDB+j].imag; } free(A); free(B); } else if (*order == TEST_COL_MJR) cblas_ztrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, a, *lda, b, *ldb); else cblas_ztrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, a, *lda, b, *ldb); }
/*------------------------------------------------------------------------ * Check the factorization of the matrix A2 */ static int check_factorization(int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, int uplo, double eps) { double Anorm, Rnorm; PLASMA_Complex64_t alpha; int info_factorization; int i,j; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L1 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L2 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t)); alpha= 1.0; LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ if (uplo == PlasmaUpper){ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N); cblas_ztrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } else{ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } /* Compute the Residual || A -L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; BLAS_zge_norm( blas_colmajor, blas_inf_norm, N, N, Residual, N, &Rnorm ); BLAS_zge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm ); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
/*------------------------------------------------------------------------ * Check the factorization of the matrix A2 */ static int check_factorization(int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, int uplo, double eps) { PLASMA_Complex64_t alpha = 1.0; double Anorm, Rnorm, result; int info_factorization; int i,j; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L1 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L2 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N); if (uplo == PlasmaUpper) cblas_ztrmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); else cblas_ztrmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); /* Compute the Residual || A - L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / ( Anorm * N * eps ); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
inline void trmm( CBLAS_ORDER const order, CBLAS_SIDE const side, CBLAS_UPLO const uplo, CBLAS_TRANSPOSE const transA, CBLAS_DIAG const unit, int const M, int const N, std::complex<double> const *A, int const lda, std::complex<double>* B, int const incB ) { std::complex<double> alpha = 1.0; cblas_ztrmm(order, side, uplo, transA, unit, M, N, reinterpret_cast<cblas_double_complex_type const *>(&alpha), reinterpret_cast<cblas_double_complex_type const *>(A), lda, reinterpret_cast<cblas_double_complex_type *>(B), incB ); }
static int check_transformation(int itype, int uplo, int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, PLASMA_Complex64_t *B2, int LDB, double eps) { PLASMA_Complex64_t alpha = 1.0; double Anorm, Rnorm, result; int info_transformation; int i, j; char *str; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Aorig = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'a', N, N, A1, LDA, Residual, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig, N); /* Rebuild the symmetry of A2 */ if (uplo == PlasmaLower) { for (j = 0; j < N; j++) for (i = j+1; i < N; i++) Aorig[j+i*N] = conj(Aorig[i+j*N]); } else { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) Aorig[j+i*N] = conj(Aorig[i+j*N]); } if (itype == 1) { if (uplo == PlasmaLower) { str = "L*A2*L'"; cblas_ztrmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } else{ str = "U'*A2*U"; cblas_ztrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*A2*inv(L)"; cblas_ztrsm(CblasColMajor, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*A2*inv(U')"; cblas_ztrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrsm(CblasColMajor, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } } /* Compute the Residual ( A1 - W ) */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the global transformation \n"); printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Transformation is suspicious ! \n"); info_transformation = 1; } else { printf("-- Transformation is CORRECT ! \n"); info_transformation = 0; } free(Residual); free(Aorig); free(work); return info_transformation; }
int CORE_zttrfb(int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *V, int LDV, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *WORK, int LDWORK) { static PLASMA_Complex64_t zone = 1.0; static PLASMA_Complex64_t mzone = -1.0; int j, vi; /* Check input arguments */ if (M1 < 0) { coreblas_error(5, "Illegal value of M1"); return -5; } if (N1 < 0) { coreblas_error(6, "Illegal value of N1"); return -6; } if ((M2 < 0) || ( (side == PlasmaRight) && (M1 != M2) ) ) { coreblas_error(7, "Illegal value of M2"); return -7; } if ((N2 < 0) || ( (side == PlasmaLeft) && (N1 != N2) ) ) { coreblas_error(8, "Illegal value of N2"); return -8; } if (K < 0) { coreblas_error(9, "Illegal value of K"); return -9; } /* Quick return */ if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0)) return PLASMA_SUCCESS; if (storev == PlasmaColumnwise) { if (direct == PlasmaForward) { /* * Let V = ( V1 ) (first K rows) * ( V2 ) * where V2 is non-unit upper triangular */ if (side == PlasmaLeft) { /* * Colwise / Forward / Left * ------------------------- * * Form H * A or H' * A where A = ( A1 ) * ( A2 ) * where A2 = ( A2_1 ) * ( A2_2 ) */ /* * W = A1 + V' * A2 */ /* * W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N2, &A2[M2-K], LDA2, WORK, LDWORK); /* * W = V2' * A2_2 */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), &V[M2-K], LDV, WORK, LDWORK); if (M2 > K) { /* * W = W + V1' * A2_1 */ cblas_zgemm( CblasColMajor, CblasConjTrans, CblasNoTrans, K, N2, M2-K, CBLAS_SADDR(zone), V, LDV, A2, LDA2, CBLAS_SADDR(zone), WORK, LDWORK); } /* * W = A1 + W */ for(j = 0; j < N1; j++) { cblas_zaxpy(K, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* * A2 = A2 - V * T * W -> W = T * W, A2 = A2 - V * W */ /* * W = T * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < N1; j++) { cblas_zaxpy(K, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2_1 = A2_1 - V1 * W */ if (M2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2-K, N2, K, CBLAS_SADDR(mzone), V, LDV, WORK, LDWORK, CBLAS_SADDR(zone), A2, LDA2); } /* * W = - V2 * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, K, N2, CBLAS_SADDR(mzone), &V[M2-K], LDV, WORK, LDWORK); /* * A2_2 = A2_2 + W */ for(j = 0; j < N2; j++) { cblas_zaxpy( K, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*j+(M2-K)], 1); } } else { /* * Colwise / Forward / Right * ------------------------- * * Form H * A or H' * A where: * * A = ( A1 A2 ) * * A2 = ( A2_1 : A2_2 ) * * A2_1 is M2 x (M2-K) * A2_2 is M2 x K * * V = ( V_1 ) * ( V_2 ) * * V_1 is full and (N2-K) x K * V_2 is upper triangular and K x K */ /* * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) * * W is M x K * A1 is M x K * A2 is M x N2 split as (A2_1 A2_2) such as * A2_1 is (N2-K) x K * A2_2 is M x K */ /* W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M2, K, &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK); /* W = W * V_2 --> W = A2_2 * V_2 */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), &V[N2-K], LDV, WORK, LDWORK); /* W = W + A2_1 * V_1 */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, K, N2-K, CBLAS_SADDR(zone), A2, LDA2, V, LDV, CBLAS_SADDR(zone), WORK, LDWORK); } /* W = A1 + W */ for (j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* W = W * T --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2 = A2 - W * V' --> A2 - W*V_1' - W*V_2' */ /* A2 = A2 - W * V_1' */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, M2, N2-K, K, CBLAS_SADDR(mzone), WORK, LDWORK, V, LDV, CBLAS_SADDR(zone), A2, LDA2); } /* A2 = A2 - W * V_2' */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, M2, K, CBLAS_SADDR(mzone), &V[N2-K], LDV, WORK, LDWORK); for(j = 0; j < K; j++) { cblas_zaxpy( M2, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*(j+N2-K)], 1); } } } else { coreblas_error(3, "Not implemented (ColWise / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } else { /* * Rowwise */ if (direct == PlasmaForward) { /* * Let V = ( V1 V2 ) (V1: first K cols) * * where V2 is non-unit lower triangular */ if (side == PlasmaLeft) { /* * Rowwise / Forward / Left * ------------------------- * * Form H * A or H' * A where A = ( A1 ) * ( A2 ) * where A2 = ( A2_1 ) * ( A2_2 ) */ /* V_2 first element */ vi = LDV*(M2-K); /* * W = A1 + V * A2 */ /* * W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N2, &A2[M2-K], LDA2, WORK, LDWORK); /* * W = V2 * A2_2 */ //**DB CblasColMajor, CblasLeft, CblasUpper, cblas_ztrmm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), &V[vi], LDV, WORK, LDWORK); if (M2 > K) { /* * W = W + V1 * A2_1 */ cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, K, N2, M2-K, CBLAS_SADDR(zone), V, LDV, A2, LDA2, CBLAS_SADDR(zone), WORK, LDWORK); } /* * W = A1 + W */ for(j = 0; j < N1; j++) { cblas_zaxpy( K, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* * W = T * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < N1; j++) { cblas_zaxpy( K, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2 = A2 - V' * T * W -> A2 = A2 - V' * W */ /* * A2_1 = A2_1 - V1' * W */ if (M2 > K) { cblas_zgemm( CblasColMajor, CblasConjTrans, CblasNoTrans, M2-K, N2, K, CBLAS_SADDR(mzone), V, LDV, WORK, LDWORK, CBLAS_SADDR(zone), A2, LDA2); } /* * W = - V2' * W */ cblas_ztrmm( CblasColMajor, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit, K, N2, CBLAS_SADDR(mzone), &V[vi], LDV, WORK, LDWORK); /* * A2_2 = A2_2 + W */ for(j = 0; j < N2; j++) { cblas_zaxpy( K, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*j+(M2-K)], 1); } } else { /* * Rowwise / Forward / Right * ------------------------- * * Form H * A or H' * A where: * * A = ( A1 A2 ) * * A2 = ( A2_1 : A2_2 ) * * A2_1 is M2 x (M2-K) * A2_2 is M2 x K * * V = ( V_1 ) * ( V_2 ) * * V_1 is full and (N2-K) x K * V_2 is lower triangular and K x K */ /* * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) * * W is M x K * A1 is M x K * A2 is M x N2 split as (A2_1 A2_2) such as * A2_1 is (N2-K) x K * A2_2 is M x K */ /* V_2 and A2_2 first element */ vi = LDV*(N2-K); /* W = A2_2 */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M2, K, &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK); /* W = W * V_2' --> W = A2_2 * V_2' */ cblas_ztrmm( CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), &V[vi], LDV, WORK, LDWORK); /* W = W + A2_1 * V_1' */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, M2, K, N2-K, CBLAS_SADDR(zone), A2, LDA2, V, LDV, CBLAS_SADDR(zone), WORK, LDWORK); } /* W = A1 + W */ for (j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1, &WORK[LDWORK*j], 1); } /* W = W * op(T) --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */ cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K, CBLAS_SADDR(zone), T, LDT, WORK, LDWORK); /* * A1 = A1 - W */ for(j = 0; j < K; j++) { cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } /* * A2 = A2 - W * V --> A2 - W*V_1 - W*V_2 */ /* A2 = A2 - W * V_1 */ if (N2 > K) { cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, N2-K, K, CBLAS_SADDR(mzone), WORK, LDWORK, V, LDV, CBLAS_SADDR(zone), A2, LDA2); } /* A2 = A2 - W * V_2 */ cblas_ztrmm( CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, M2, K, CBLAS_SADDR(mzone), &V[vi], LDV, WORK, LDWORK); for(j = 0; j < K; j++) { cblas_zaxpy( M2, CBLAS_SADDR(zone), &WORK[LDWORK*j], 1, &A2[LDA2*(N2-K+j)], 1); } } } else { coreblas_error(3, "Not implemented (Rowwise / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } return PLASMA_SUCCESS; }