Exemplo n.º 1
0
lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x,
                           lapack_int incx, double* tau )
{
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_d_nancheck( 1, alpha, 1 ) ) {
        return -2;
    }
    if( LAPACKE_d_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) {
        return -3;
    }
#endif
    return LAPACKE_dlarfg_work( n, alpha, x, incx, tau );
}
Exemplo n.º 2
0
int 
CORE_dgbrce(int uplo, int N,
            PLASMA_desc *A,
            double *V,
            double *TAU,
            int st,
            int ed,
            int eltsize)
{
    int    NB, J1, J2, J3, KDM2, len, pt;
    int    len1, len2, t1ed, t2st;
    int    i;
    static double zzero = 0.0;
    PLASMA_desc vA=*A;

    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }

    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;

    NB = A->mb;
    KDM2 = A->mb-2;
    if( uplo == PlasmaLower ){
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply Householder from the right. and create newnnz outside the band if J3 < N */
            J1  = ed+1;
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len = J3-J1+1;
            if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/

            t1ed  = (J3/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J3-t2st+1;
            if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,  i-1), ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)  );
            if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
            len    = J3-J2; 
            if(len>0){
                /* generate Householder to annihilate a(j+kd,j-1) within the band */
                *V(J3)         = *A(J3,(i-1));
                *A(J3,(i-1))   = 0.0;
                LAPACKE_dlarfg_work( 2, A(J2,(i-1)), V(J3), 1, TAU(J3));
            }
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
        for (i = ed; i >= st+1 ; i--){
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len    = J3-J2;
            if(len>0){
                pt    = J2;
                J1    = i;
                J2    = min(ed,N);
                t1ed  = (J2/NB)*NB;
                t2st  = max(t1ed+1,J1);
                len1  = t1ed-J1+1; 
                len2  = J2-t2st+1;
                if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i   ), ELTLDD(vA, pt),  A((pt+1),  i  ), ELTLDD(vA, pt+1) );
                if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt),  A((pt+1), t2st), ELTLDD(vA, pt+1) );
            }
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* apply Householder from the right. and create newnnz outside the band if J3 < N */
            J1  = ed+1;
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len = J3-J1+1;
            if(J3>J2)*A((i-1), J3)=zzero;

            t1ed  = (J3/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J3-t2st+1;
            if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, i-1),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1),  A(i, t2st), ELTLDD(vA, i) );
            /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */
            len    = J3-J2; 
            if(len>0){
                /* generate Householder to annihilate a(j+kd,j-1) within the band */
                *V(J3)         = *A(i-1, J3);
                *A(i-1, J3)  = 0.0;
                LAPACKE_dlarfg_work( 2, A(i-1, J2), V(J3), 1, TAU(J3));
            }
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */
        for (i = ed; i >= st+1 ; i--){
            /* find if there was a nnz created. if yes apply right else nothing to be done. */
            J2  = min((i+1+KDM2), N);
            J3  = min((J2+1), N);
            len    = J3-J2;
            if(len>0){
                pt    = J2;
                J1    = i;
                J2    = min(ed,N);
                t1ed  = (J2/NB)*NB;
                t2st  = max(t1ed+1,J1);
                len1  = t1ed-J1+1; 
                len2  = J2-t2st+1;
                if(len1>0)CORE_dlarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i   , pt), ELTLDD(vA, i),     A(i,    pt+1), ELTLDD(vA, i) );
                if(len2>0)CORE_dlarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st),  A(t2st, pt+1), ELTLDD(vA, t2st) );
            }
        }
    } /* end of else for the upper case */

    return PLASMA_SUCCESS;
}
Exemplo n.º 3
0
int 
CORE_dgbelr(int uplo, int N,
            PLASMA_desc *A,
            double *V,
            double *TAU,
            int st,
            int ed,
            int eltsize)
{
    int    NB, J1, J2;
    int    len1, len2, t1ed, t2st;
    int    i;
    static double zzero = 0.0;
    PLASMA_desc vA=*A;

    /* Check input arguments */
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (ed <= st) {
        coreblas_error(6, "Illegal value of st and ed (internal)");
        return -6;
    }

    /* Quick return */
    if (N == 0)
        return PLASMA_SUCCESS;

    NB = A->mb;
    if( uplo == PlasmaLower ){
        /* ========================
         *       LOWER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* generate Householder to annihilate a(i+k-1,i) within the band*/
            *V(i)          = *A(i, (st-1));
            *A(i, (st-1))  = zzero;
            LAPACKE_dlarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i));

            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
            CORE_dlarfx2ce(PlasmaLower, V(i), TAU(i), A(i-1,i-1), A(i,i-1), A(i,i)); 
        }
        /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1),   ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
        }
    } else {
        /* ========================
         *       UPPER CASE
         * ========================*/
        for (i = ed; i >= st+1 ; i--){
            /* generate Householder to annihilate a(i+k-1,i) within the band*/
            *V(i)          = *A((st-1),  i);
            *A((st-1),  i) = zzero;
            LAPACKE_dlarfg_work( 2, A((st-1), (i-1)), V(i), 1, TAU(i));

            /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
            J1    = st;
            J2    = i-2;
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1),   ELTLDD(vA, J1)  , A(J1  , i), ELTLDD(vA, J1)   );
            if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
            CORE_dlarfx2ce(PlasmaUpper, V(i), TAU(i), A((i-1),(i-1)), A((i-1), i), A(i,i));
        }
        /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1*/
        for (i = ed; i >= st+1 ; i--){
            J1    = i+1;
            J2    = min(ed,N);
            t1ed  = (J2/NB)*NB;
            t2st  = max(t1ed+1,J1);
            len1  = t1ed-J1+1; 
            len2  = J2-t2st+1;
            if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1  ), ELTLDD(vA, (i-1)),  A(i,  J1 ), ELTLDD(vA, i) );
            if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)),  A(i, t2st), ELTLDD(vA, i) );
        }
    }  /* end of else for the upper case*/

    return PLASMA_SUCCESS;

}
Exemplo n.º 4
0
/***************************************************************************//**
 * @ingroup CORE_double
 *******************************************************************************
 *
 *  Purpose
 *  =======
 *
 *  CORE_dlarfx2c applies a complex elementary reflector H to a diagonal corner 
 *  C=[C1, C2, C3], from both the left and the right side. C = H * C * H.
 *  It is used in the case of general matrices, where it create a nnz at the 
 *  NEW_NNZ position, then it eliminate it and update the reflector V and TAU.
 *  If PlasmaLower, a left apply is followed by a right apply.
 *  If PlasmaUpper, a right apply is followed by a left apply.
 *  H is represented in the form
 *   
 *  This routine is a special code for a corner C diagonal block  C1  NEW_NNZ
 *                                                                C2  C3
 *   
 *
 *        H = I - tau * v * v'
 *
 *  where tau is a complex scalar and v is a complex vector.
 *
 *  If tau = 0, then H is taken to be the unit matrix
 *
 *  This version uses inline code if H has order < 11.
 *
 *  Arguments
 *  =========
 *
 * @param[in] uplo
 *          = PlasmaUpper: Upper triangle of A is stored;
 *          = PlasmaLower: Lower triangle of A is stored.
 *
 * @param[in, out] V
 *          On entry, the double complex V in the representation of H.
 *          On exit, the double complex V in the representation of H, 
 *          updated by the elimination of the NEW_NNZ created by the 
 *          left apply in case of PlasmaLower or the right apply in 
 *          case of PlasmaUpper.
 *
 * @param[in] TAU
 *          On entry, the value tau in the representation of H.
 *          On exit, the value tau in the representation of H, 
 *          updated by the elimination of the NEW_NNZ created by the 
 *          left apply in case of PlasmaLower or the right apply in 
 *          case of PlasmaUpper.
 *
 * @param[in,out] C1
 *          On entry, the element C1.
 *          On exit, C1 is overwritten by the result H * C * H.
 *
 * @param[in,out] C2
 *          On entry, the element C2.
 *          On exit, C2 is overwritten by the result H * C * H.
  *
 * @param[in,out] C3
 *          On entry, the element C3.
 *          On exit, C3 is overwritten by the result H * C * H.
 *
 *  =====================================================================
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *
 ******************************************************************************/
int 
CORE_dlarfx2ce(PLASMA_enum uplo,
               double *V,
               double *TAU,
               double *C1,
               double *C2,
               double *C3)
{
    double T2, SUM, TEMP, VIN, TAUIN;

    /* Quick return */
    if (*TAU == (double)0.0)
        return PLASMA_SUCCESS;

    /*
     *        Special code for a diagonal block  C1
     *                                           C2  C3
     */
    if(uplo==PlasmaLower){    
        /*
         *  Do the corner for the lower case BIDIAG ==> Left then will
         *  create a new nnz. eliminate it and modify V TAU and then
         *  Right L and R for the 2x2 corner
         *             C(N-1, N-1)  C(N-1,N)        C1  TEMP 
         *             C(N  , N-1)  C(N  ,N)        C2  C3 
         */
        VIN   = *V;
        TAUIN = (*TAU);
        /*  Left 1 ==> C1 */
        /*             C2 */
        VIN  = (VIN);
        T2   = TAUIN * (VIN);
        SUM  = *C1   + VIN*(*C2);
        *C1  = *C1   - SUM*TAUIN;
        *C2  = *C2   - SUM*T2;
        /*   new nnz at TEMP and update C3 */
        SUM  =        VIN * (*C3);
        TEMP =      - SUM * TAUIN;
        *C3  = *C3  - SUM * T2;
        /*  generate Householder to annihilate the nonzero created at TEMP */
        *V    = TEMP;
        LAPACKE_dlarfg_work( 2, C1, V, 1, TAU);
        VIN   = (*V);
        TAUIN = (*TAU);
        /*  Right 1 ==> C2 C3 */
        /* VIN     = VIN */
        T2  = TAUIN * (VIN);
        SUM = *C2   + VIN*(*C3);
        *C2 = *C2   - SUM*TAUIN;
        *C3 = *C3   - SUM*T2;
    }else if(uplo==PlasmaUpper){ 
        /*  
         * Do the corner for the upper case BIDIAG ==> Right then will
        *  create a new nnz. eliminate it and modify V TAU and then
        *  Left
        *             C(N-1, N-1)  C(N-1,N)        C1    C2 
        *             C(N  , N-1)  C(N  ,N)        TEMP  C3 
        *  For Left : use (TAU) and V. 
        *  For Right: use (TAU) and (V) as input. 
        */
        VIN   = (*V);
        TAUIN = (*TAU);
        /*  Right 1 ==> C1 C2 */
        /* VIN     = VIN */
        T2   = TAUIN*(VIN);
        SUM  = *C1  + VIN*(*C2);
        *C1  = *C1  - SUM*TAUIN;
        *C2  = *C2  - SUM*T2;
        /*   new nnz at TEMP and update C3 */
        SUM  =        VIN * (*C3);
        TEMP =      - SUM * TAUIN;
        *C3  = *C3  - SUM * T2;
        /*  generate Householder to annihilate the nonzero created at TEMP */
        *V    = TEMP;
        LAPACKE_dlarfg_work( 2, C1, V, 1, TAU);
        VIN   = *V;
        TAUIN = (*TAU);
        /*  apply from the Left using the NEW V TAU to the remaining 2 elements [C2 C3] */
        /*  Left 2 ==> C2 */
        /*             C3 */
        VIN = (VIN);
        T2  = TAUIN*(VIN);
        SUM = *C2 + VIN*(*C3);
        *C2 = *C2 - SUM*TAUIN;
        *C3 = *C3 - SUM*T2;
    }
    return PLASMA_SUCCESS;
}