lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x, lapack_int incx, double* tau ) { #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ if( LAPACKE_d_nancheck( 1, alpha, 1 ) ) { return -2; } if( LAPACKE_d_nancheck( 1+(n-2)*ABS(incx), x, incx ) ) { return -3; } #endif return LAPACKE_dlarfg_work( n, alpha, x, incx, tau ); }
int CORE_dgbrce(int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize) { int NB, J1, J2, J3, KDM2, len, pt; int len1, len2, t1ed, t2st; int i; static double zzero = 0.0; PLASMA_desc vA=*A; /* Check input arguments */ if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (ed <= st) { coreblas_error(6, "Illegal value of st and ed (internal)"); return -6; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; NB = A->mb; KDM2 = A->mb-2; if( uplo == PlasmaLower ){ /* ======================== * LOWER CASE * ========================*/ for (i = ed; i >= st+1 ; i--){ /* apply Householder from the right. and create newnnz outside the band if J3 < N */ J1 = ed+1; J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J1+1; if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/ t1ed = (J3/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J3-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) ); if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st)); len = J3-J2; if(len>0){ /* generate Householder to annihilate a(j+kd,j-1) within the band */ *V(J3) = *A(J3,(i-1)); *A(J3,(i-1)) = 0.0; LAPACKE_dlarfg_work( 2, A(J2,(i-1)), V(J3), 1, TAU(J3)); } } /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */ for (i = ed; i >= st+1 ; i--){ J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J2; if(len>0){ pt = J2; J1 = i; J2 = min(ed,N); t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J2-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) ); if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) ); } } } else { /* ======================== * UPPER CASE * ========================*/ for (i = ed; i >= st+1 ; i--){ /* apply Householder from the right. and create newnnz outside the band if J3 < N */ J1 = ed+1; J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J1+1; if(J3>J2)*A((i-1), J3)=zzero; t1ed = (J3/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J3-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) ); if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) ); /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */ len = J3-J2; if(len>0){ /* generate Householder to annihilate a(j+kd,j-1) within the band */ *V(J3) = *A(i-1, J3); *A(i-1, J3) = 0.0; LAPACKE_dlarfg_work( 2, A(i-1, J2), V(J3), 1, TAU(J3)); } } /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */ for (i = ed; i >= st+1 ; i--){ /* find if there was a nnz created. if yes apply right else nothing to be done. */ J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J2; if(len>0){ pt = J2; J1 = i; J2 = min(ed,N); t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J2-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) ); if(len2>0)CORE_dlarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) ); } } } /* end of else for the upper case */ return PLASMA_SUCCESS; }
int CORE_dgbelr(int uplo, int N, PLASMA_desc *A, double *V, double *TAU, int st, int ed, int eltsize) { int NB, J1, J2; int len1, len2, t1ed, t2st; int i; static double zzero = 0.0; PLASMA_desc vA=*A; /* Check input arguments */ if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (ed <= st) { coreblas_error(6, "Illegal value of st and ed (internal)"); return -6; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; NB = A->mb; if( uplo == PlasmaLower ){ /* ======================== * LOWER CASE * ========================*/ for (i = ed; i >= st+1 ; i--){ /* generate Householder to annihilate a(i+k-1,i) within the band*/ *V(i) = *A(i, (st-1)); *A(i, (st-1)) = zzero; LAPACKE_dlarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i)); /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/ J1 = st; J2 = i-2; t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J2-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) ); if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) ); CORE_dlarfx2ce(PlasmaLower, V(i), TAU(i), A(i-1,i-1), A(i,i-1), A(i,i)); } /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */ for (i = ed; i >= st+1 ; i--){ J1 = i+1; J2 = min(ed,N); t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J2-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) ); if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) ); } } else { /* ======================== * UPPER CASE * ========================*/ for (i = ed; i >= st+1 ; i--){ /* generate Householder to annihilate a(i+k-1,i) within the band*/ *V(i) = *A((st-1), i); *A((st-1), i) = zzero; LAPACKE_dlarfg_work( 2, A((st-1), (i-1)), V(i), 1, TAU(i)); /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/ J1 = st; J2 = i-2; t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J2-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1,i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) ); if(len2>0)CORE_dlarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) ); CORE_dlarfx2ce(PlasmaUpper, V(i), TAU(i), A((i-1),(i-1)), A((i-1), i), A(i,i)); } /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1*/ for (i = ed; i >= st+1 ; i--){ J1 = i+1; J2 = min(ed,N); t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; len2 = J2-t2st+1; if(len1>0)CORE_dlarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) ); if(len2>0)CORE_dlarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) ); } } /* end of else for the upper case*/ return PLASMA_SUCCESS; }
/***************************************************************************//** * @ingroup CORE_double ******************************************************************************* * * Purpose * ======= * * CORE_dlarfx2c applies a complex elementary reflector H to a diagonal corner * C=[C1, C2, C3], from both the left and the right side. C = H * C * H. * It is used in the case of general matrices, where it create a nnz at the * NEW_NNZ position, then it eliminate it and update the reflector V and TAU. * If PlasmaLower, a left apply is followed by a right apply. * If PlasmaUpper, a right apply is followed by a left apply. * H is represented in the form * * This routine is a special code for a corner C diagonal block C1 NEW_NNZ * C2 C3 * * * H = I - tau * v * v' * * where tau is a complex scalar and v is a complex vector. * * If tau = 0, then H is taken to be the unit matrix * * This version uses inline code if H has order < 11. * * Arguments * ========= * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in, out] V * On entry, the double complex V in the representation of H. * On exit, the double complex V in the representation of H, * updated by the elimination of the NEW_NNZ created by the * left apply in case of PlasmaLower or the right apply in * case of PlasmaUpper. * * @param[in] TAU * On entry, the value tau in the representation of H. * On exit, the value tau in the representation of H, * updated by the elimination of the NEW_NNZ created by the * left apply in case of PlasmaLower or the right apply in * case of PlasmaUpper. * * @param[in,out] C1 * On entry, the element C1. * On exit, C1 is overwritten by the result H * C * H. * * @param[in,out] C2 * On entry, the element C2. * On exit, C2 is overwritten by the result H * C * H. * * @param[in,out] C3 * On entry, the element C3. * On exit, C3 is overwritten by the result H * C * H. * * ===================================================================== * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************/ int CORE_dlarfx2ce(PLASMA_enum uplo, double *V, double *TAU, double *C1, double *C2, double *C3) { double T2, SUM, TEMP, VIN, TAUIN; /* Quick return */ if (*TAU == (double)0.0) return PLASMA_SUCCESS; /* * Special code for a diagonal block C1 * C2 C3 */ if(uplo==PlasmaLower){ /* * Do the corner for the lower case BIDIAG ==> Left then will * create a new nnz. eliminate it and modify V TAU and then * Right L and R for the 2x2 corner * C(N-1, N-1) C(N-1,N) C1 TEMP * C(N , N-1) C(N ,N) C2 C3 */ VIN = *V; TAUIN = (*TAU); /* Left 1 ==> C1 */ /* C2 */ VIN = (VIN); T2 = TAUIN * (VIN); SUM = *C1 + VIN*(*C2); *C1 = *C1 - SUM*TAUIN; *C2 = *C2 - SUM*T2; /* new nnz at TEMP and update C3 */ SUM = VIN * (*C3); TEMP = - SUM * TAUIN; *C3 = *C3 - SUM * T2; /* generate Householder to annihilate the nonzero created at TEMP */ *V = TEMP; LAPACKE_dlarfg_work( 2, C1, V, 1, TAU); VIN = (*V); TAUIN = (*TAU); /* Right 1 ==> C2 C3 */ /* VIN = VIN */ T2 = TAUIN * (VIN); SUM = *C2 + VIN*(*C3); *C2 = *C2 - SUM*TAUIN; *C3 = *C3 - SUM*T2; }else if(uplo==PlasmaUpper){ /* * Do the corner for the upper case BIDIAG ==> Right then will * create a new nnz. eliminate it and modify V TAU and then * Left * C(N-1, N-1) C(N-1,N) C1 C2 * C(N , N-1) C(N ,N) TEMP C3 * For Left : use (TAU) and V. * For Right: use (TAU) and (V) as input. */ VIN = (*V); TAUIN = (*TAU); /* Right 1 ==> C1 C2 */ /* VIN = VIN */ T2 = TAUIN*(VIN); SUM = *C1 + VIN*(*C2); *C1 = *C1 - SUM*TAUIN; *C2 = *C2 - SUM*T2; /* new nnz at TEMP and update C3 */ SUM = VIN * (*C3); TEMP = - SUM * TAUIN; *C3 = *C3 - SUM * T2; /* generate Householder to annihilate the nonzero created at TEMP */ *V = TEMP; LAPACKE_dlarfg_work( 2, C1, V, 1, TAU); VIN = *V; TAUIN = (*TAU); /* apply from the Left using the NEW V TAU to the remaining 2 elements [C2 C3] */ /* Left 2 ==> C2 */ /* C3 */ VIN = (VIN); T2 = TAUIN*(VIN); SUM = *C2 + VIN*(*C3); *C2 = *C2 - SUM*TAUIN; *C3 = *C3 - SUM*T2; } return PLASMA_SUCCESS; }