lapack_int LAPACKE_zlarft( int matrix_layout, char direct, char storev, lapack_int n, lapack_int k, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* tau, lapack_complex_double* t, lapack_int ldt ) { lapack_int ncols_v, nrows_v; if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlarft", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ ncols_v = LAPACKE_lsame( storev, 'c' ) ? k : ( LAPACKE_lsame( storev, 'r' ) ? n : 1); nrows_v = LAPACKE_lsame( storev, 'c' ) ? n : ( LAPACKE_lsame( storev, 'r' ) ? k : 1); if( LAPACKE_z_nancheck( k, tau, 1 ) ) { return -8; } if( LAPACKE_zge_nancheck( matrix_layout, nrows_v, ncols_v, v, ldv ) ) { return -6; } #endif return LAPACKE_zlarft_work( matrix_layout, direct, storev, n, k, v, ldv, tau, t, ldt ); }
int CORE_zgeqrt(int M, int N, int IB, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *TAU, PLASMA_Complex64_t *WORK) { int i, k, sb; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDA < max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } if ((LDT < max(1,IB)) && (IB > 0)) { coreblas_error(7, "Illegal value of LDT"); return -7; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; k = min(M, N); for(i = 0; i < k; i += IB) { sb = min(IB, k-i); LAPACKE_zgeqr2_work(LAPACK_COL_MAJOR, M-i, sb, &A[LDA*i+i], LDA, &TAU[i], WORK); LAPACKE_zlarft_work(LAPACK_COL_MAJOR, lapack_const(PlasmaForward), lapack_const(PlasmaColumnwise), M-i, sb, &A[LDA*i+i], LDA, &TAU[i], &T[LDT*i], LDT); if (N > i+sb) { LAPACKE_zlarfb_work( LAPACK_COL_MAJOR, lapack_const(PlasmaLeft), lapack_const(PlasmaConjTrans), lapack_const(PlasmaForward), lapack_const(PlasmaColumnwise), M-i, N-i-sb, sb, &A[LDA*i+i], LDA, &T[LDT*i], LDT, &A[LDA*(i+sb)+i], LDA, WORK, N-i-sb); } } return PLASMA_SUCCESS; }
void plasma_pzlarft_blgtrd(plasma_context_t *plasma) { int my_core_id = PLASMA_RANK; int cores_num = plasma->world_size; /*===========================*/ int N, NB,Vblksiz; PLASMA_Complex64_t *V; PLASMA_Complex64_t *T; PLASMA_Complex64_t *TAU; PLASMA_sequence *sequence; PLASMA_request *request; /*=========================== * local variables *===========================*/ int LDT, LDV; int Vm, Vn, mt, nt; int myrow, mycol, blkj, blki; int firstrow; int blkid,vpos,taupos,tpos; int blkpercore,blkcnt, myid; plasma_unpack_args_8(N, NB, Vblksiz, V, T, TAU, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; /* Quick return */ if (N == 0){ return; } if (NB == 0){ return; } if (NB == 1){ return; } findVTsiz(N, NB, Vblksiz, &blkcnt, &LDV); blkpercore = blkcnt/cores_num; blkpercore = blkpercore==0 ? 1:blkpercore; LDT = Vblksiz; LDV = NB+Vblksiz-1; /*======================================== * compute the T's in parallel. * The Ts are independent so each core pick * a T and compute it. The loop is based on * the version 113 of the pzunmqr_blgtrd.c * which go over the losange block_column * by block column. but it is not important * here the order because Ts are independent. * ======================================== */ nt = plasma_ceildiv((N-1),Vblksiz); for (blkj=nt-1; blkj>=0; blkj--) { /* the index of the first row on the top of block (blkj) */ firstrow = blkj * Vblksiz + 1; /*find the number of tile for this block */ if( blkj == nt-1 ) mt = plasma_ceildiv( N - firstrow, NB); else mt = plasma_ceildiv( N - (firstrow+1), NB); /*loop over the tiles find the size of the Vs and apply it */ for (blki=mt; blki>0; blki--) { /*calculate the size of each losange of Vs= (Vm,Vn)*/ myrow = firstrow + (mt-blki)*NB; mycol = blkj*Vblksiz; Vm = min( NB+Vblksiz-1, N-myrow); if( ( blkj == nt-1 ) && ( blki == mt ) ){ Vn = min (Vblksiz, Vm); } else { Vn = min (Vblksiz, Vm-1); } /*calculate the pointer to the Vs and the Ts. * Note that Vs and Ts have special storage done * by the bulgechasing function*/ findVTpos(N,NB,Vblksiz,mycol,myrow, &vpos, &taupos, &tpos, &blkid); myid = blkid/blkpercore; if( my_core_id==(myid%cores_num) ){ if( ( Vm > 0 ) && ( Vn > 0 ) ){ LAPACKE_zlarft_work(LAPACK_COL_MAJOR, lapack_const(PlasmaForward), lapack_const(PlasmaColumnwise), Vm, Vn, V(vpos), LDV, TAU(taupos), T(tpos), LDT); } } } } }