Exemplo n.º 1
0
void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
       int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, 
       int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) {
  int i,j,LDA,LDB;
  CBLAS_TEST_ZOMPLEX *A, *B;
  enum CBLAS_SIDE side;
  enum CBLAS_DIAG diag;
  enum CBLAS_UPLO uplo;
  enum CBLAS_TRANSPOSE trans;

  get_uplo_type(uplow,&uplo);
  get_transpose_type(transp,&trans);
  get_diag_type(diagn,&diag);
  get_side_type(rtlf,&side);

  if (*order == TEST_ROW_MJR) {
     if (side == CblasLeft) {
        LDA = *m+1;
        A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
        for( i=0; i<*m; i++ )
           for( j=0; j<*m; j++ ) {
              A[i*LDA+j].real=a[j*(*lda)+i].real;
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
           }
     }
     else{
        LDA = *n+1;
        A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
        for( i=0; i<*n; i++ )
           for( j=0; j<*n; j++ ) {
              A[i*LDA+j].real=a[j*(*lda)+i].real;
              A[i*LDA+j].imag=a[j*(*lda)+i].imag;
           }
     }
     LDB = *n+1;
     B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
     for( i=0; i<*m; i++ )
        for( j=0; j<*n; j++ ) {
           B[i*LDB+j].real=b[j*(*ldb)+i].real;
           B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
        }
     cblas_ztrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, 
		 A, LDA, B, LDB );
     for( j=0; j<*n; j++ )
        for( i=0; i<*m; i++ ) {
           b[j*(*ldb)+i].real=B[i*LDB+j].real;
           b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
        }
     free(A);
     free(B);
  }
  else if (*order == TEST_COL_MJR)
     cblas_ztrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, 
		   a, *lda, b, *ldb);
  else
     cblas_ztrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, 
		   a, *lda, b, *ldb);
}
Exemplo n.º 2
0
/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */
static int check_factorization(int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, int uplo, double eps)
{
    double Anorm, Rnorm;
    PLASMA_Complex64_t alpha;
    int info_factorization;
    int i,j;

    PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *L1       = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *L2       = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    double *work              = (double *)malloc(N*sizeof(double));

    memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t));
    memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t));

    alpha= 1.0;

    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);

    /* Dealing with L'L or U'U  */
    if (uplo == PlasmaUpper){
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
        cblas_ztrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N);
    }
    else{
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
        cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N);
    }

    /* Compute the Residual || A -L'L|| */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
           Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];

    BLAS_zge_norm( blas_colmajor, blas_inf_norm, N, N, Residual, N, &Rnorm );
    BLAS_zge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm );

    printf("============\n");
    printf("Checking the Cholesky Factorization \n");
    printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));

    if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else{
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(Residual); free(L1); free(L2); free(work);

    return info_factorization;
}
Exemplo n.º 3
0
/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */
static int check_factorization(int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, int uplo, double eps)
{
    PLASMA_Complex64_t alpha = 1.0;
    double Anorm, Rnorm, result;
    int info_factorization;
    int i,j;

    PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *L1       = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *L2       = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    double *work                 = (double *)malloc(N*sizeof(double));

    memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t));
    memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t));

    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);

    /* Dealing with L'L or U'U  */
    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N);
    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N);
    if (uplo == PlasmaUpper)
       cblas_ztrmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N);
    else
       cblas_ztrmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N);

    /* Compute the Residual || A - L'L|| */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
           Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];

    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1,       LDA, work);

    result = Rnorm / ( Anorm * N * eps );
    printf("============\n");
    printf("Checking the Cholesky Factorization \n");
    printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result);

    if ( isnan(result) || isinf(result) || (result > 60.0) ){
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else{
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(Residual); free(L1); free(L2); free(work);

    return info_factorization;
}
Exemplo n.º 4
0
inline void trmm(
	CBLAS_ORDER const order,
	CBLAS_SIDE const side,
	CBLAS_UPLO const uplo,
	CBLAS_TRANSPOSE const transA,
	CBLAS_DIAG const unit,
	int const M,
	int const N,
	std::complex<double> const *A, int const lda,
        std::complex<double>* B, int const incB
) {
	std::complex<double> alpha = 1.0;
	cblas_ztrmm(order, side, uplo, transA, unit, M, N, 
		reinterpret_cast<cblas_double_complex_type const *>(&alpha),
		reinterpret_cast<cblas_double_complex_type const *>(A), lda,
	        reinterpret_cast<cblas_double_complex_type *>(B), incB
	);
}
Exemplo n.º 5
0
static int check_transformation(int itype, int uplo, int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, PLASMA_Complex64_t *B2, int LDB, double eps)
{
    PLASMA_Complex64_t alpha = 1.0;
    double Anorm, Rnorm, result;
    int info_transformation;
    int i, j;
    char *str;

    PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *Aorig    = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
    double *work                 = (double *)malloc(N*sizeof(double));

    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'a',                N, N, A1, LDA, Residual, N);
    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig,    N);
    
    /* Rebuild the symmetry of A2 */
    if (uplo == PlasmaLower) {
        for (j = 0; j < N; j++)
            for (i = j+1; i < N; i++)
                Aorig[j+i*N] = conj(Aorig[i+j*N]); 
    } else {
        for (i = 0; i < N; i++)
            for (j = i+1; j < N; j++)
                Aorig[j+i*N] = conj(Aorig[i+j*N]); 
    }

    if (itype == 1) {
        if (uplo == PlasmaLower) {
            str = "L*A2*L'";
            cblas_ztrmm(CblasColMajor, CblasLeft,  CblasLower, CblasNoTrans,   CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
            cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "U'*A2*U";
            cblas_ztrmm(CblasColMajor, CblasLeft,  CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
            cblas_ztrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
        }
    }
    else {
        if (uplo == PlasmaLower) {
            str = "inv(L')*A2*inv(L)";
            cblas_ztrsm(CblasColMajor, CblasLeft,  CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
            cblas_ztrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,   CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "inv(U)*A2*inv(U')";
            cblas_ztrsm(CblasColMajor, CblasLeft,  CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
            cblas_ztrsm(CblasColMajor, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);   
            
        }
    }
    
    /* Compute the Residual ( A1 - W ) */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
            Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i];
    
    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1,       LDA, work);
    
    result = Rnorm / (Anorm * N *eps);
    printf("============\n");
    printf("Checking the global transformation \n");
    printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result );

    if (isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- Transformation is suspicious ! \n");
        info_transformation = 1;
    }
    else {
        printf("-- Transformation is CORRECT ! \n");
        info_transformation = 0;
    }

    free(Residual); free(Aorig); free(work);

    return info_transformation;
}
Exemplo n.º 6
0
int CORE_zttrfb(int side, int trans, int direct, int storev,
                int M1, int N1, int M2, int N2, int K,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *V, int LDV,
                PLASMA_Complex64_t *T, int LDT,
                PLASMA_Complex64_t *WORK, int LDWORK)
{
    static PLASMA_Complex64_t zone  =  1.0;
    static PLASMA_Complex64_t mzone = -1.0;

    int j, vi;

    /* Check input arguments */
    if (M1 < 0) {
        coreblas_error(5, "Illegal value of M1");
        return -5;
    }
    if (N1 < 0) {
        coreblas_error(6, "Illegal value of N1");
        return -6;
    }
    if ((M2 < 0) ||
        ( (side == PlasmaRight) && (M1 != M2) ) ) {
        coreblas_error(7, "Illegal value of M2");
        return -7;
    }
    if ((N2 < 0) ||
        ( (side == PlasmaLeft) && (N1 != N2) ) ) {
        coreblas_error(8, "Illegal value of N2");
        return -8;
    }
    if (K < 0) {
        coreblas_error(9, "Illegal value of K");
        return -9;
    }

    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
        return PLASMA_SUCCESS;

    if (storev == PlasmaColumnwise) {

        if (direct == PlasmaForward) {
            /*
             * Let  V =  ( V1 )    (first K rows)
             *           ( V2 )
             * where V2 is non-unit upper triangular
             */
            if (side == PlasmaLeft) {

                /*
                 * Colwise / Forward / Left
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where  A = ( A1 )
                 *                                     ( A2 )
                 * where A2 = ( A2_1 )
                 *            ( A2_2 )
                 */

                /*
                 * W = A1 + V' * A2
                 */

                /*
                 * W = A2_2
                 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N2,
                    &A2[M2-K], LDA2, WORK, LDWORK);

                /*
                 * W = V2' * A2_2
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    CblasConjTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), &V[M2-K], LDV,
                    WORK, LDWORK);

                if (M2 > K) {
                    /*
                     * W = W + V1' * A2_1
                     */
                    cblas_zgemm(
                        CblasColMajor, CblasConjTrans, CblasNoTrans,
                        K, N2, M2-K,
                        CBLAS_SADDR(zone), V, LDV,
                        A2, LDA2,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /*
                 * W = A1 + W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(K, CBLAS_SADDR(zone),
                            &A1[LDA1*j], 1,
                            &WORK[LDWORK*j], 1);
                }

                /*
                 * A2 = A2 - V * T * W -> W = T * W, A2 = A2 - V * W
                 */

                /*
                 * W = T * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
                /*
                 * A1 = A1 - W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(K, CBLAS_SADDR(mzone),
                            &WORK[LDWORK*j], 1,
                            &A1[LDA1*j], 1);
                }

                /*
                 * A2_1 = A2_1 - V1 * W
                 */
                if (M2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        M2-K, N2, K,
                        CBLAS_SADDR(mzone), V, LDV,
                        WORK, LDWORK,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /*
                 * W = - V2 * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    CblasNoTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(mzone), &V[M2-K], LDV,
                    WORK, LDWORK);

                /*
                 * A2_2 = A2_2 + W
                 */
                for(j = 0; j < N2; j++) {
                    cblas_zaxpy(
                        K, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*j+(M2-K)], 1);
                }
            }
            else {
                /* 
                 * Colwise / Forward / Right
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where:
                 * 
                 *   A  = ( A1 A2 )
                 *
                 *   A2 = ( A2_1 : A2_2 )
                 *
                 *         A2_1 is M2 x (M2-K)
                 *         A2_2 is M2 x K
                 *
                 *   V = ( V_1 ) 
                 *       ( V_2 )
                 *
                 *         V_1 is full and (N2-K) x K
                 *         V_2 is upper triangular and K x K
                 */

                /*
                 * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T)
                 *
                 *    W  is M x K
                 *    A1 is M x K
                 *    A2 is M x N2 split as (A2_1 A2_2) such as
                 *       A2_1 is (N2-K) x K
                 *       A2_2 is M x K
                 */

                /* W = A2_2 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M2, K,
                    &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK);

                /* W = W * V_2 --> W = A2_2 * V_2 */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    CblasNoTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), &V[N2-K], LDV,
                    WORK, LDWORK);

                /* W = W + A2_1 * V_1 */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        M2, K, N2-K,
                        CBLAS_SADDR(zone), A2, LDA2,
                        V, LDV,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /* W = A1 + W */
                for (j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1,
                                &WORK[LDWORK*j], 1);
                }

                /* W = W * T --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);

                /*
                 * A1 = A1 - W
                 */

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1,
                                &A1[LDA1*j], 1);
                }

                /*
                 * A2 = A2 - W * V' --> A2 - W*V_1' - W*V_2'
                 */

                /* A2 = A2 - W * V_1' */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasConjTrans,
                        M2, N2-K, K,
                        CBLAS_SADDR(mzone), WORK, LDWORK,
                        V, LDV,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /* A2 =  A2 -  W * V_2' */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    CblasConjTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(mzone), &V[N2-K], LDV,
                    WORK, LDWORK);

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(
                        M2, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*(j+N2-K)], 1);
                }
            }
        }
        else {
            coreblas_error(3, "Not implemented (ColWise / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    else {
        /*
         * Rowwise
         */
        if (direct == PlasmaForward) {
            /*
             * Let  V =  ( V1 V2 )    (V1: first K cols)
             * 
             * where V2 is non-unit lower triangular
             */

            if (side == PlasmaLeft) {

                /*
                 * Rowwise / Forward / Left
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where  A = ( A1 )
                 *                                     ( A2 )
                 * where A2 = ( A2_1 )
                 *            ( A2_2 )
                 */

                /* V_2 first element */
                vi = LDV*(M2-K);

                /*
                 * W = A1 + V * A2
                 */

                /*
                 * W = A2_2
                 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N2,
                    &A2[M2-K], LDA2, WORK, LDWORK);

                /*
                 * W = V2 * A2_2
                 */
                //**DB CblasColMajor, CblasLeft, CblasUpper,
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasLower,
                    CblasNoTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), &V[vi], LDV,
                    WORK, LDWORK);

                if (M2 > K) {
                    /*
                     * W = W + V1 * A2_1
                     */
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        K, N2, M2-K,
                        CBLAS_SADDR(zone), V, LDV,
                        A2, LDA2,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /*
                 * W = A1 + W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(
                            K, CBLAS_SADDR(zone),
                            &A1[LDA1*j], 1,
                            &WORK[LDWORK*j], 1);
                }

                /*
                 * W = T * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);

                /*
                 * A1 = A1 - W
                 */
                for(j = 0; j < N1; j++) {
                    cblas_zaxpy(
                            K, CBLAS_SADDR(mzone),
                            &WORK[LDWORK*j], 1,
                            &A1[LDA1*j], 1);
                }

                /*
                 * A2 = A2 - V' * T * W -> A2 = A2 - V' * W
                 */

                /*
                 * A2_1 = A2_1 - V1' * W
                 */
                if (M2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasConjTrans, CblasNoTrans,
                        M2-K, N2, K,
                        CBLAS_SADDR(mzone), V, LDV,
                        WORK, LDWORK,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /*
                 * W = - V2' * W
                 */
                cblas_ztrmm(
                    CblasColMajor, CblasLeft, CblasLower,
                    CblasConjTrans, CblasNonUnit, K, N2,
                    CBLAS_SADDR(mzone), &V[vi], LDV,
                    WORK, LDWORK);

                /*
                 * A2_2 = A2_2 + W
                 */
                for(j = 0; j < N2; j++) {
                    cblas_zaxpy(
                        K, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*j+(M2-K)], 1);
                }
            }
            else {
                /* 
                 * Rowwise / Forward / Right
                 * -------------------------
                 *
                 * Form  H * A  or  H' * A  where:
                 * 
                 *   A  = ( A1 A2 )
                 *
                 *   A2 = ( A2_1 : A2_2 )
                 *
                 *         A2_1 is M2 x (M2-K)
                 *         A2_2 is M2 x K
                 *
                 *   V = ( V_1 ) 
                 *       ( V_2 )
                 *
                 *         V_1 is full and (N2-K) x K
                 *         V_2 is lower triangular and K x K
                 */

                /*
                 * W = ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T)
                 *
                 *    W  is M x K
                 *    A1 is M x K
                 *    A2 is M x N2 split as (A2_1 A2_2) such as
                 *       A2_1 is (N2-K) x K
                 *       A2_2 is M x K
                 */

                /* V_2 and A2_2 first element */
                vi = LDV*(N2-K);

                /* W = A2_2 */
                LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M2, K,
                    &A2[LDA2*(N2-K)], LDA2, WORK, LDWORK);

                /* W = W * V_2' --> W = A2_2 * V_2' */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasLower,
                    CblasConjTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), &V[vi], LDV,
                    WORK, LDWORK);

                /* W = W + A2_1 * V_1' */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasConjTrans,
                        M2, K, N2-K,
                        CBLAS_SADDR(zone), A2, LDA2,
                        V, LDV,
                        CBLAS_SADDR(zone), WORK, LDWORK);
                }

                /* W = A1 + W */
                for (j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(zone), &A1[LDA1*j], 1,
                                &WORK[LDWORK*j], 1);
                }

                /* W = W * op(T) --> ( A1 + A2_1*V_1 + A2_2*V_2 ) * op(T) */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);

                /*
                 * A1 = A1 - W
                 */

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(M1, CBLAS_SADDR(mzone), &WORK[LDWORK*j], 1,
                                &A1[LDA1*j], 1);
                }

                /*
                 * A2 = A2 - W * V --> A2 - W*V_1 - W*V_2
                 */

                /* A2 = A2 - W * V_1 */
                if (N2 > K) {
                    cblas_zgemm(
                        CblasColMajor, CblasNoTrans, CblasNoTrans,
                        M2, N2-K, K,
                        CBLAS_SADDR(mzone), WORK, LDWORK,
                        V, LDV,
                        CBLAS_SADDR(zone), A2, LDA2);
                }

                /* A2 =  A2 -  W * V_2 */
                cblas_ztrmm(
                    CblasColMajor, CblasRight, CblasLower,
                    CblasNoTrans, CblasNonUnit, M2, K,
                    CBLAS_SADDR(mzone), &V[vi], LDV,
                    WORK, LDWORK);

                for(j = 0; j < K; j++) {
                    cblas_zaxpy(
                        M2, CBLAS_SADDR(zone),
                        &WORK[LDWORK*j], 1,
                        &A2[LDA2*(N2-K+j)], 1);
                }

            }
 
        }
        else {
            coreblas_error(3, "Not implemented (Rowwise / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    return PLASMA_SUCCESS;
}