/***************************************************************************//** * * @ingroup double_Tile_Async * * PLASMA_dsygst_Tile_Async - reduces a complex Hermitian-definite * generalized eigenproblem to standard form. * If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is * overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) * If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x * = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must * have been previously factorized as U**T*U or L*L**T by * PLASMA_DPOTRF. * ONLY PlasmaItype == 1 and PlasmaLower supported! * Non-blocking equivalent of PLASMA_dsygst_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_dsygst * @sa PLASMA_dsygst_Tile * @sa PLASMA_chegst_Tile_Async * @sa PLASMA_dsygst_Tile_Async * @sa PLASMA_ssygst_Tile_Async * @sa PLASMA_dsygv_Tile_Async * ******************************************************************************/ int PLASMA_dsygst_Tile_Async(PLASMA_enum itype, PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; PLASMA_desc descB = *B; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (plasma_desc_check(&descB) != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst_Tile", "invalid second descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_dsygst_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* * Transform Hermitian-definite generalized eigenproblem * to standard form */ plasma_dynamic_call_6(plasma_pdsygst, PLASMA_enum, itype, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_desc, descB, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zlaswp_Tile_Async - performs a series of row interchanges * on the matrix A. One row interchange is initiated for each of * rows K1 through K2 of A. * Non-blocking equivalent of PLASMA_zlaswp_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zlaswp * @sa PLASMA_zlaswp_Tile * @sa PLASMA_claswp_Tile_Async * @sa PLASMA_dlaswp_Tile_Async * @sa PLASMA_slaswp_Tile_Async * @sa PLASMA_zgetrf_Tile_Async * ******************************************************************************/ int PLASMA_zlaswp_Tile_Async(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (K1 != 1) || (K2 != descA.m) ) { plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } plasma_dynamic_call_3( plasma_pzbarrier_tl2pnl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); /* swap */ plasma_dynamic_call_5( plasma_pzlaswp, PLASMA_desc, descA, int *, IPIV, int, INCX, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_3( plasma_pzbarrier_pnl2tl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ibnb_tile(int M, int N, PLASMA_enum func, int type, PLASMA_desc **desc) { int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb_tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && ((func == PLASMA_FUNC_SGELS) || (func == PLASMA_FUNC_DGELS) || (func == PLASMA_FUNC_CGELS) || (func == PLASMA_FUNC_ZGELS) || (func == PLASMA_FUNC_SGESVD) || (func == PLASMA_FUNC_DGESVD) || (func == PLASMA_FUNC_CGESVD) || (func == PLASMA_FUNC_ZGESVD))) NT *= 2; /* Allocate and initialize descriptor */ *desc = (PLASMA_desc*)malloc(sizeof(PLASMA_desc)); if (*desc == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } **desc = plasma_desc_init(type, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); /* Allocate matrix */ if (plasma_desc_mat_alloc(*desc)) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } /* Check that everything is ok */ status = plasma_desc_check(*desc); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "invalid descriptor"); return status; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t_Tile_Async * * PLASMA_cpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric * positive definite or Hermitian positive definite matrix. * Non-blocking equivalent of PLASMA_cpotrf_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_cpotrf * @sa PLASMA_cpotrf_Tile * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_dpotrf_Tile_Async * @sa PLASMA_spotrf_Tile_Async * @sa PLASMA_cpotrs_Tile_Async * ******************************************************************************/ int PLASMA_cpotrf_Tile_Async(PLASMA_enum uplo, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_cpotrf_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_cpotrf_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_cpotrf_Tile", "illegal value of uplo"); return plasma_request_fail(sequence, request, -1); } /* Quick return */ /* if (max(N, 0) == 0) return PLASMA_SUCCESS; */ plasma_parallel_call_4(plasma_pcpotrf, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup float_Tile_Async * * PLASMA_splgsy_Tile_Async - Generate a random hermitian matrix by tiles. * Non-blocking equivalent of PLASMA_splgsy_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_splgsy * @sa PLASMA_splgsy_Tile * @sa PLASMA_cplgsy_Tile_Async * @sa PLASMA_dplgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * ******************************************************************************/ int PLASMA_splgsy_Tile_Async( float bump, PLASMA_desc *A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_splgsy_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ if (min( descA.m, descA.n ) == 0) return PLASMA_SUCCESS; plasma_parallel_call_5(plasma_psplgsy, float, bump, PLASMA_desc, descA, unsigned long long int, seed, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zgetrf_nopiv_Tile_Async - Computes the tile LU factorization of a * matrix. Non-blocking equivalent of PLASMA_zgetrf_nopiv_Tile(). May return * before the computation is finished. Allows for pipelining of operations ar * runtime. * ******************************************************************************* * * @param[in,out] A * On entry, the M-by-N matrix to be factored. * On exit, the tile factors L and U from the factorization. * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zgetrf_nopiv * @sa PLASMA_zgetrf_nopiv_Tile * @sa PLASMA_cgetrf_nopiv_Tile_Async * @sa PLASMA_dgetrf_nopiv_Tile_Async * @sa PLASMA_sgetrf_nopiv_Tile_Async * @sa PLASMA_zgetrs_Tile_Async * ******************************************************************************/ int PLASMA_zgetrf_nopiv_Tile_Async(PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(A) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetrf_nopiv_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descA = *A; } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_zgetrf_nopiv_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } plasma_dynamic_call_3(plasma_pzgetrf_nopiv, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex * M-by-N matrix A, optionally computing the left and/or right singular * vectors. The SVD is written * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A; they are real and non-negative, and * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * * Note that the routine returns V**T, not V. * Not LAPACK Compliant for now! * Note: Only PlasmaNoVec supported! ******************************************************************************* * * @param[in] jobu * Specifies options for computing all or part of the matrix U. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] jobvt * Specifies options for computing all or part of the matrix V**H. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, * if JOBU = 'O', A is overwritten with the first min(m,n) * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) * rows of V**H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] S * The double precision singular values of A, sorted so that S(i) >= S(i+1). * * @param[out] U * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. * If JOBU = 'A', U contains the M-by-M unitary matrix U; * if JOBU = 'S', U contains the first min(m,n) columns of U * (the left singular vectors, stored columnwise); * if JOBU = 'N' or 'O', U is not referenced. * * @param[in] LDU * The leading dimension of the array U. LDU >= 1; if * JOBU = 'S' or 'A', LDU >= M. * * @param[out] VT * If JOBVT = 'A', VT contains the N-by-N unitary matrix * V**H; * if JOBVT = 'S', VT contains the first min(m,n) rows of * V**H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * @param[in] LDVT * The leading dimension of the array VT. LDVT >= 1; if * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd * On exit, contains auxiliary factorization data. * * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zgesvd_Tile * @sa PLASMA_zgesvd_Tile_Async * @sa PLASMA_cgesvd * @sa PLASMA_dgesvd * @sa PLASMA_sgesvd * ******************************************************************************/ int PLASMA_zgesvd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t *A, int LDA, double *S, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *VT, int LDVT, PLASMA_desc *descT) { int NB, IB, IBNB, minMN, MT, NT, minMTNT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descU, descVT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgesvd", "plasma_tune() failed"); return status; } /* Set MT, NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); minMN = min(M,N); minMTNT = min(MT,NT); /* Check input arguments */ if (jobu != PlasmaNoVec && jobu !=PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobu"); return -1; } if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobvt"); return -2; } if (M < 0) { plasma_error("PLASMA_zgesvd", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_zgesvd", "illegal value of N"); return -4; } if (LDA < max(1, M)) { plasma_error("PLASMA_zgesvd", "illegal value of LDA"); return -6; } if (LDU < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDU"); return -9; } if (LDVT < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDVT"); return -11; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != MT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_zgesvd", "invalid T descriptor"); return -12; } /* Quick return */ if (min(M, N) == 0) { return PLASMA_SUCCESS; } if (jobu == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -1; } if (jobvt == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -2; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) ); if (jobu == PlasmaVec){ plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU))); }
/***************************************************************************//** * * @ingroup float * * PLASMA_ssytrd - reduces a complex Hermitian matrix A to real symmetric * tridiagonal form S using a two-stage approach * First stage: reduction to band tridiagonal form (unitary Q1); * Second stage: reduction from band to tridiagonal form (unitary * Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**T * * A * Q = S. * Not LAPACK compliant as A does not contain the T elements * Note: Only PlasmaNoVec supported! * ******************************************************************************* * * @param[in] jobz * Intended usage: * = PlasmaNoVec: computes eigenvalues only; * = PlasmaVec: computes eigenvalues and eigenvectors. * Note: Only PlasmaNoVec supported! * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, the lower triangle (if uplo = PlasmaLower) or the * upper triangle (if uplo = PlasmaUpper) of A, including the * diagonal, is destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[out] D * On exit, the diagonal elements of the tridiagonal matrix: * D(i) = A(i,i). * * @param[out] E * On exit, he off-diagonal elements of the tridiagonal matrix: * E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower. * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_ssyev * On exit, contains auxiliary factorization data. * * @param[out] Q * On exit, if jobz = PlasmaVec and info = 0, the eigenvectors. * * @param[in] LDQ * The leading dimension of the array Q. LDQ >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * ******************************************************************************* * * @sa PLASMA_ssytrd_Tile * @sa PLASMA_ssytrd_Tile_Async * @sa PLASMA_chetrd * @sa PLASMA_dsytrd * @sa PLASMA_ssytrd * ******************************************************************************/ int PLASMA_ssytrd(PLASMA_enum jobz, PLASMA_enum uplo, int N, float *A, int LDA, float *D, float *E, PLASMA_desc *descT, float *Q, int LDQ) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descQ; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_ssytrd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SSYTRD, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Check input arguments */ if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssytrd", "illegal value of jobz"); return -1; } if (uplo != PlasmaLower && uplo != PlasmaUpper) { plasma_error("PLASMA_ssytrd", "illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_ssytrd", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_ssytrd", "illegal value of LDA"); return -5; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != NT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_ssytrd", "invalid T descriptor"); return -8; } if (LDQ < max(1, N)) { plasma_error("PLASMA_ssytrd", "illegal value of LDQ"); return -10; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; if (jobz == PlasmaVec) { plasma_error("PLASMA_ssytrd", "computing the eigenvectors is not supported in this version"); return -1; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); if (jobz == PlasmaVec) { plasma_sooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N , plasma_desc_mat_free(&(descQ)) ); } } else { plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N ); if (jobz == PlasmaVec) plasma_siplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N ); } /* Call the tile interface */ PLASMA_ssytrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooptile2lap( descA, A, NB, NB, LDA, N ); if (jobz == PlasmaVec) { plasma_sooptile2lap( descQ, Q, NB, NB, LDQ, N ); } plasma_dynamic_sync(); plasma_desc_mat_free(&descA); if (jobz == PlasmaVec) plasma_desc_mat_free(&descQ); } else { plasma_siptile2lap( descA, A, NB, NB, LDA, N ); if (jobz == PlasmaVec) plasma_siptile2lap( descQ, Q, NB, NB, LDQ, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float_Tile_Async * * PLASMA_ssytrd_Tile_Async - Computes all eigenvalues and, * optionally, eigenvectors of a complex Hermitian matrix A using a * two-stage approach: * First stage: reduction to band tridiagonal form; * Second stage: reduction from band to tridiagonal form. * * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_ssytrd * @sa PLASMA_ssytrd_Tile * @sa PLASMA_chetrd_Tile_Async * @sa PLASMA_dsytrd_Tile_Async * @sa PLASMA_ssytrd_Tile_Async * ******************************************************************************/ int PLASMA_ssytrd_Tile_Async(PLASMA_enum jobz, PLASMA_enum uplo, PLASMA_desc *A, float *D, float *E, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request) { int NB, IB, IBNB, NT; PLASMA_desc descA = *A; PLASMA_desc descT = *T; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Set NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (descA.ln%NB==0) ? (descA.ln/NB) : (descA.ln/NB+1); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (plasma_desc_check(&descT) != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (jobz == PlasmaVec) && (plasma_desc_check(Q) != PLASMA_SUCCESS) ) { plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssytrd_Tile_Async", "illegal value of jobz"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (descA.m != descA.n) { plasma_error("PLASMA_ssytrd_Tile_Async", "matrix need to be square"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (descA.nb != descA.mb) { plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (jobz == PlasmaVec) { plasma_error("PLASMA_ssytrd_Tile_Async", "computing the eigenvectors is not supported in this version"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) { plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Reduction to tridiagonal form * with a two-stage approach. */ /* Reduction to BAND tridiagonal form */ plasma_dynamic_call_5(plasma_pssyrbt, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_desc, descT, PLASMA_sequence*, sequence, PLASMA_request*, request); /* * Build the Q of the first stage */ /* if (jobz == PlasmaVec){ */ /* /\* Initialize Q to Identity *\/ */ /* plasma_dynamic_call_6(plasma_pslaset, */ /* PLASMA_enum, PlasmaUpperLower, */ /* float, 0.0, */ /* float, 1.0, */ /* PLASMA_desc, descQ, */ /* PLASMA_sequence*, sequence, */ /* PLASMA_request*, request); */ /* /\* Accumulate the transformations from the first stage*\/ */ /* plasma_dynamic_call_6(plasma_psorgtr, */ /* PLASMA_enum, uplo, */ /* PLASMA_desc, descA, */ /* PLASMA_desc, descQ, */ /* PLASMA_desc, descT, */ /* PLASMA_sequence*, sequence, */ /* PLASMA_request*, request); */ /* } */ /* Set the V's to zero before the 2nd stage (bulge chasing) */ /* */ plasma_dynamic_call_5(plasma_pslaset2, PLASMA_enum, uplo, float, 0.0, PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) : plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb), PLASMA_sequence*, sequence, PLASMA_request*, request); /* Reduction from BAND tridiagonal to the final condensed form */ plasma_dynamic_call_7(plasma_pssbrdt, PLASMA_enum, uplo, PLASMA_desc, descA, float*, D, float*, E, PLASMA_desc, descT, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup float * * PLASMA_ssygv - Computes all eigenvalues and, optionally, * eigenvectors of a complex generalized Hermitian-definite * eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or * B*A*x=(lambda)*x. * Here A and B are assumed to be Hermitian and B is also positive * definite. * Note: Only PlasmaNoVec supported! * ******************************************************************************* * * @param[in] PlasmaItype * Intended usage: * = 1: A*x=(lambda)*B*x * = 2: A*Bx=(lambda)*x * = 3: B*A*x=(lambda)*x * * @param[in] jobz * Intended usage: * = PlasmaNoVec: computes eigenvalues only; * = PlasmaVec: computes eigenvalues and eigenvectors. * Note: Only PlasmaNoVec supported! * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A and B are stored; * = PlasmaLower: Lower triangle of A and B are stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, if jobz = PlasmaVec, then if return value = 0, A * contains the matrix Z of eigenvectors. * The eigenvectors are normalized as follows: * if ITYPE = 1 or 2, Z**T*B*Z = I; * if ITYPE = 3, Z**T*inv(B)*Z = I. * If jobz = PlasmaNoVec, then on exit the lower triangle (if * uplo = PlasmaLower) or the upper triangle (if uplo = * PlasmaUpper) of A, including the diagonal, is destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the symmetric (or Hermitian) positive definite * matrix B. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of B contains the upper triangular part of the matrix * B, and the strictly lower triangular part of B is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of B contains the lower triangular part of the matrix * B, and the strictly upper triangular part of B is not * referenced. * On exit, if return value <= N, the part of B containing * the matrix is overwritten by the triangular factor U or L * from the Cholesky factorization B = U**T*U or B = L*L**T. * * @param[in] LDB * The leading dimension of the array B. LDA >= max(1,N). * * @param[out] W * On exit, if info = 0, the eigenvalues. * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_ssygv * On exit, contains auxiliary factorization data. * * @param[out] Q * On exit, if jobz = PlasmaVec and info = 0, the eigenvectors. * * @param[in] LDQ * The leading dimension of Q. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval <=N if INFO = i, plasma_ssygv failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * \retval >N if INFO = N + i, for 1 <= i <= N, then the leading * minor of order i of B is not positive definite. * The factorization of B could not be completed and * no eigenvalues or eigenvectors were computed. * ******************************************************************************* * * @sa PLASMA_ssygv_Tile * @sa PLASMA_ssygv_Tile_Async * @sa PLASMA_chegv * @sa PLASMA_dsygv * @sa PLASMA_ssygv * ******************************************************************************/ int PLASMA_ssygv(PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, int N, float *A, int LDA, float *B, int LDB, float *W, PLASMA_desc *descT, float *Q, int LDQ) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descQ; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_ssygv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SSYGV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_ssygv", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Check input arguments */ if (itype != 1 && itype != 2 && itype != 3) { plasma_error("PLASMA_ssygv", "Illegal value of itype"); return -1; } if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssygv", "illegal value of jobz"); return -2; } if (uplo != PlasmaLower && uplo!= PlasmaUpper) { plasma_error("PLASMA_ssygv", "only PlasmaLower supported"); return -3; } if (N < 0) { plasma_error("PLASMA_ssygv", "illegal value of N"); return -4; } if (LDA < max(1, N)) { plasma_error("PLASMA_ssygv", "illegal value of LDA"); return -6; } if (LDB < max(1, N)) { plasma_error("PLASMA_ssygv", "illegal value of LDB"); return -8; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != NT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_ssygv", "invalid T descriptor"); return -10; } if (LDQ < max(1, N)) { plasma_error("PLASMA_ssygv", "illegal value of LDQ"); return -12; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; if (jobz == PlasmaVec) { plasma_error("PLASMA_ssygv", "computing the eigenvectors is not supported in this version"); return -1; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) ); plasma_sooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) ); if (jobz == PlasmaVec) { /* No need for conversion, it's just output */ plasma_sdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descQ)) ); }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zgetri_Tile_Async - Computes the inverse of a matrix using the LU * factorization computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * Non-blocking equivalent of PLASMA_zgetri_Tile(). * May return before the computation is finished. * Allows for pipelining of operations at runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zgetri * @sa PLASMA_zgetri_Tile * @sa PLASMA_cgetri_Tile_Async * @sa PLASMA_dgetri_Tile_Async * @sa PLASMA_sgetri_Tile_Async * @sa PLASMA_zgetrf_Tile_Async * ******************************************************************************/ int PLASMA_zgetri_Tile_Async(PLASMA_desc *A, int *IPIV, PLASMA_desc *W, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA; PLASMA_desc descW; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile_Async", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(A) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri_Tile_Async", "invalid A descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descA = *A; } /* Check descriptors for correctness */ if (plasma_desc_check(W) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri_Tile_Async", "invalid W descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descW = *W; } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_zgetri_Tile_Async", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ if (max(descA.m, 0) == 0) return PLASMA_SUCCESS; plasma_dynamic_call_5(plasma_pztrtri, PLASMA_enum, PlasmaUpper, PLASMA_enum, PlasmaNonUnit, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_9(plasma_pztrsmrv, PLASMA_enum, PlasmaRight, PLASMA_enum, PlasmaLower, PLASMA_enum, PlasmaNoTrans, PLASMA_enum, PlasmaUnit, PLASMA_Complex64_t, (PLASMA_Complex64_t) 1.0, PLASMA_desc, descA, PLASMA_desc, descW, PLASMA_sequence*, sequence, PLASMA_request*, request); /* No need for barrier tile2row because of previous dependencies */ /* swap */ plasma_dynamic_call_5( plasma_pzlaswpc, PLASMA_desc, descA, int *, IPIV, int, -1, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_3( plasma_pzbarrier_row2tl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zlansy_Tile_Async - Non-blocking equivalent of PLASMA_zlansy_Tile(). * May return before the computation is finished. * Allows for pipelining of operations at runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zlansy * @sa PLASMA_zlansy_Tile * @sa PLASMA_clansy_Tile_Async * @sa PLASMA_dlansy_Tile_Async * @sa PLASMA_slansy_Tile_Async * ******************************************************************************/ int PLASMA_zlansy_Tile_Async(PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A, double *value, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA; double *work = NULL; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zlansy_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zlansy_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(A) != PLASMA_SUCCESS) { plasma_error("PLASMA_zlansy_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descA = *A; } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_zlansy_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm) && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) { plasma_error("PLASMA_zlansy_Tile", "illegal value of norm"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) { plasma_error("PLASMA_zlansy_Tile", "illegal value of uplo"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ if ( descA.m == 0) { *value = 0.0; return PLASMA_SUCCESS; } if (PLASMA_SCHEDULING == PLASMA_STATIC_SCHEDULING) { if (norm == PlasmaFrobeniusNorm) { work = plasma_shared_alloc(plasma, 2*PLASMA_SIZE, PlasmaRealDouble ); } else { work = plasma_shared_alloc(plasma, PLASMA_SIZE, PlasmaRealDouble ); } } plasma_parallel_call_7(plasma_pzlansy, PLASMA_enum, norm, PLASMA_enum, uplo, PLASMA_desc, descA, double*, work, double*, value, PLASMA_sequence*, sequence, PLASMA_request*, request); if (work != NULL) plasma_shared_free( plasma, work ); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup float_Tile_Async * * PLASMA_sgelqf_Tile_Async - Computes the tile LQ factorization of a matrix. * Non-blocking equivalent of PLASMA_sgelqf_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_sgelqf * @sa PLASMA_sgelqf_Tile * @sa PLASMA_cgelqf_Tile_Async * @sa PLASMA_dgelqf_Tile_Async * @sa PLASMA_sgelqf_Tile_Async * @sa PLASMA_sgelqs_Tile_Async * ******************************************************************************/ int PLASMA_sgelqf_Tile_Async(PLASMA_desc *A, PLASMA_desc *T, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; PLASMA_desc descT = *T; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_sgelqf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_sgelqf_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_sgelqf_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_sgelqf_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (plasma_desc_check(&descT) != PLASMA_SUCCESS) { plasma_error("PLASMA_sgelqf_Tile", "invalid second descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_sgelqf_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ /* if (min(M, N) == 0) return PLASMA_SUCCESS; */ if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { plasma_parallel_call_4(plasma_psgelqf, PLASMA_desc, descA, PLASMA_desc, descT, PLASMA_sequence*, sequence, PLASMA_request*, request); } else { plasma_dynamic_call_5(plasma_psgelqfrh, PLASMA_desc, descA, PLASMA_desc, descT, PLASMA_enum, PLASMA_RHBLK, PLASMA_sequence*, sequence, PLASMA_request*, request); } return PLASMA_SUCCESS; }