/***************************************************************************//** * **/ int plasma_alloc_ibnb_tile(int M, int N, PLASMA_enum func, int type, PLASMA_desc **desc) { int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb_tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && ((func == PLASMA_FUNC_SGELS) || (func == PLASMA_FUNC_DGELS) || (func == PLASMA_FUNC_CGELS) || (func == PLASMA_FUNC_ZGELS) || (func == PLASMA_FUNC_SGESVD) || (func == PLASMA_FUNC_DGESVD) || (func == PLASMA_FUNC_CGESVD) || (func == PLASMA_FUNC_ZGESVD))) NT *= 2; /* Allocate and initialize descriptor */ *desc = (PLASMA_desc*)malloc(sizeof(PLASMA_desc)); if (*desc == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } **desc = plasma_desc_init(type, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); /* Allocate matrix */ if (plasma_desc_mat_alloc(*desc)) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } /* Check that everything is ok */ status = plasma_desc_check(*desc); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "invalid descriptor"); return status; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup float * * PLASMA_sgesv - Computes the solution to a system of linear equations A * X = B, * where A is an N-by-N matrix and X and B are N-by-NRHS matrices. * The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. * The factored form of A is then used to solve the system of equations A * X = B. * ******************************************************************************* * * @param[in] N * The number of linear equations, i.e., the order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. * NRHS >= 0. * * @param[in,out] A * On entry, the N-by-N coefficient matrix A. * On exit, the tile L and U factors from the factorization. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[out] IPIV * On exit, the pivot indices that define the permutations. * * @param[in,out] B * On entry, the N-by-NRHS matrix of right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* * * @sa PLASMA_sgesv_Tile * @sa PLASMA_sgesv_Tile_Async * @sa PLASMA_cgesv * @sa PLASMA_dgesv * @sa PLASMA_sgesv * ******************************************************************************/ int PLASMA_sgesv(int N, int NRHS, float *A, int LDA, int *IPIV, float *B, int LDB) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_sgesv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_sgesv", "illegal value of N"); return -1; } if (NRHS < 0) { plasma_error("PLASMA_sgesv", "illegal value of NRHS"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_sgesv", "illegal value of LDA"); return -4; } if (LDB < max(1, N)) { plasma_error("PLASMA_sgesv", "illegal value of LDB"); return -8; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SGESV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_sgesv", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_sooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive * definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky * factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] A * The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, * computed by PLASMA_zpotrf. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zpotrs_Tile * @sa PLASMA_zpotrs_Tile_Async * @sa PLASMA_cpotrs * @sa PLASMA_dpotrs * @sa PLASMA_spotrs * @sa PLASMA_zpotrf * ******************************************************************************/ int PLASMA_zpotrs(PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_zpotrs", "illegal value of uplo"); return -1; } if (N < 0) { plasma_error("PLASMA_zpotrs", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_zpotrs", "illegal value of NRHS"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zpotrs", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_zpotrs", "illegal value of LDB"); return -7; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zpotrs", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_splgsy - Generate a random hermitian matrix by tiles. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[out] A * On exit, The random hermitian matrix A generated. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_splgsy_Tile * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy * @sa PLASMA_dplgsy * @sa PLASMA_splgsy * @sa PLASMA_splrnt * @sa PLASMA_splgsy * ******************************************************************************/ int PLASMA_splgsy( float bump, int N, float *A, int LDA, unsigned long long int seed ) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_splgsy", "illegal value of N"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_splgsy", "illegal value of LDA"); return -4; } /* Quick return */ if (max(0, N) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SGEMM, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); descA = plasma_desc_init( PlasmaRealFloat, NB, NB, NB*NB, LDA, N, 0, 0, N, N); descA.mat = A; /* Call the tile interface */ PLASMA_splgsy_Tile_Async( bump, &descA, seed, sequence, &request ); plasma_siptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * **/ int plasma_alloc_ibnb(int M, int N, PLASMA_enum func, int type, void **memptr) { size_t size; int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && (func == PLASMA_FUNC_SGELS || func == PLASMA_FUNC_DGELS || func == PLASMA_FUNC_CGELS || func == PLASMA_FUNC_ZGELS || func == PLASMA_FUNC_SGESVD || func == PLASMA_FUNC_DGESVD || func == PLASMA_FUNC_CGESVD || func == PLASMA_FUNC_ZGESVD )) NT *= 2; size = (size_t)MT*NT*IB*NB * plasma_element_size(type); if (size <= 0) { *memptr = NULL; return PLASMA_SUCCESS; } // status = posix_memalign(memptr, STANDARD_PAGE_SIZE, size); *memptr = malloc(size); // if (status != 0) { if (*memptr == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ipiv(int M, int N, PLASMA_enum func, void **memptr) { size_t size; int status; int NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ipiv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ipiv", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; NT = (N%NB==0) ? (N/NB) : ((N/NB)+1); MT = (M%NB==0) ? (M/NB) : ((M/NB)+1); size = (size_t)MT*NT * NB * sizeof(int); if (size <= 0) { *memptr = NULL; return PLASMA_SUCCESS; } // status = posix_memalign(memptr, CACHE_LINE_SIZE, size); *memptr = malloc(size); // if (status != 0) { if (*memptr == NULL) { plasma_error("plasma_alloc_ipiv", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup double * * PLASMA_dtrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ). * ******************************************************************************* * * @param[in] side * Specifies whether A appears on the left or on the right of X: * = PlasmaLeft: A*X = B * = PlasmaRight: X*A = B * * @param[in] uplo * Specifies whether the matrix A is upper triangular or lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] transA * Specifies whether the matrix A is transposed, not transposed or ugate transposed: * = PlasmaNoTrans: A is transposed; * = PlasmaTrans: A is not transposed; * = PlasmaTrans: A is ugate transposed. * * @param[in] diag * Specifies whether or not A is unit triangular: * = PlasmaNonUnit: A is non unit; * = PlasmaUnit: A us unit. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] alpha * alpha specifies the scalar alpha. * * @param[in] A * The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of the array A contains the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N * lower triangular part of the array A contains the lower triangular matrix, and the * strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the * diagonal elements of A are also not referenced and are assumed to be 1. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_dtrmm_Tile * @sa PLASMA_dtrmm_Tile_Async * @sa PLASMA_ctrmm * @sa PLASMA_dtrmm * @sa PLASMA_strmm * ******************************************************************************/ int PLASMA_dtrmm(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, double alpha, double *A, int LDA, double *B, int LDB) { int NB, NA; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dtrmm", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (side != PlasmaLeft && side != PlasmaRight) { plasma_error("PLASMA_dtrmm", "illegal value of side"); return -1; } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_dtrmm", "illegal value of uplo"); return -2; } if (transA != PlasmaTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) { plasma_error("PLASMA_dtrmm", "illegal value of transA"); return -3; } if (diag != PlasmaUnit && diag != PlasmaNonUnit) { plasma_error("PLASMA_dtrmm", "illegal value of diag"); return -4; } if (N < 0) { plasma_error("PLASMA_dtrmm", "illegal value of N"); return -5; } if (NRHS < 0) { plasma_error("PLASMA_dtrmm", "illegal value of NRHS"); return -6; } if (LDA < max(1, N)) { plasma_error("PLASMA_dtrmm", "illegal value of LDA"); return -8; } if (LDB < max(1, N)) { plasma_error("PLASMA_dtrmm", "illegal value of LDB"); return -10; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_dtrmm", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; if (side == PlasmaLeft) { NA = N; } else { NA = NRHS; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA, plasma_desc_mat_free(&(descA)) ); plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zcposv - Computes the solution to a system of linear equations A * X = B, * where A is an N-by-N symmetric positive definite (or Hermitian positive definite * in the complex case) matrix and X and B are N-by-NRHS matrices. * The Cholesky decomposition is used to factor A as * * A = U**H * U, if uplo = PlasmaUpper, or * A = L * L**H, if uplo = PlasmaLower, * * where U is an upper triangular matrix and L is a lower triangular matrix. * The factored form of A is then used to solve the system of equations A * X = B. * * PLASMA_zcposv first attempts to factorize the matrix in COMPLEX and use this * factorization within an iterative refinement procedure to produce a * solution with COMPLEX*16 normwise backward error quality (see below). * If the approach fails the method switches to a COMPLEX*16 * factorization and solve. * * The iterative refinement is not going to be a winning strategy if * the ratio COMPLEX performance over COMPLEX*16 performance is too * small. A reasonable strategy should take the number of right-hand * sides and the size of the matrix into account. This might be done * with a call to ILAENV in the future. Up to now, we always try * iterative refinement. * * The iterative refinement process is stopped if ITER > ITERMAX or * for all the RHS we have: RNRM < N*XNRM*ANRM*EPS*BWDMAX * where: * * - ITER is the number of the current iteration in the iterative refinement process * - RNRM is the infinity-norm of the residual * - XNRM is the infinity-norm of the solution * - ANRM is the infinity-operator-norm of the matrix A * - EPS is the machine epsilon returned by DLAMCH('Epsilon'). * * Actually, in its current state (PLASMA 2.1.0), the test is slightly relaxed. * * The values ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. * ******************************************************************************* * * @param[in] uplo * Specifies whether the matrix A is upper triangular or lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The number of linear equations, i.e., the order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. * NRHS >= 0. * * @param[in] A * The N-by-N symmetric positive definite (or Hermitian) coefficient matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A * contains the upper triangular part of the matrix A, and the strictly lower triangular * part of A is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * This matrix is not modified. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in] B * The N-by-NRHS matrix of right hand side matrix B. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * * @param[out] X * If return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDX * The leading dimension of the array B. LDX >= max(1,N). * * @param[out] ITER * The number of the current iteration in the iterative refinement process * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* * * @sa PLASMA_zcposv_Tile * @sa PLASMA_zcposv_Tile_Async * @sa PLASMA_dsposv * @sa PLASMA_zposv * ******************************************************************************/ int PLASMA_zcposv(PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t *X, int LDX, int *ITER) { int NB; int status; PLASMA_desc descA; PLASMA_desc descB; PLASMA_desc descX; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zcposv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_zcposv", "illegal value of uplo"); return -1; } if (N < 0) { plasma_error("PLASMA_zcposv", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_zcposv", "illegal value of NRHS"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zcposv", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_zcposv", "illegal value of LDB"); return -7; } if (LDX < max(1, N)) { plasma_error("PLASMA_zcposv", "illegal value of LDX"); return -10; } /* Quick return - currently NOT equivalent to LAPACK's * LAPACK does not have such check for ZCPOSV */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_ZCPOSV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zcposv", "plasma_tune() failed"); return status; } NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); /* DOUBLE PRECISION INITIALIZATION */ if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) ); plasma_zdesc_alloc( descX, NB, NB, N, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descX)) ); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zhemm - Performs one of the matrix-matrix operations * * \f[ C = \alpha \times A \times B + \beta \times C \f] * * or * * \f[ C = \alpha \times B \times A + \beta \times C \f] * * where alpha and beta are scalars, A is an hermitian matrix and B and * C are m by n matrices. * ******************************************************************************* * * @param[in] side * Specifies whether the hermitian matrix A appears on the * left or right in the operation as follows: * = PlasmaLeft: \f[ C = \alpha \times A \times B + \beta \times C \f] * = PlasmaRight: \f[ C = \alpha \times B \times A + \beta \times C \f] * * @param[in] uplo * Specifies whether the upper or lower triangular part of * the hermitian matrix A is to be referenced as follows: * = PlasmaLower: Only the lower triangular part of the * hermitian matrix A is to be referenced. * = PlasmaUpper: Only the upper triangular part of the * hermitian matrix A is to be referenced. * * @param[in] M * Specifies the number of rows of the matrix C. M >= 0. * * @param[in] N * Specifies the number of columns of the matrix C. N >= 0. * * @param[in] alpha * Specifies the scalar alpha. * * @param[in] A * A is a LDA-by-ka matrix, where ka is M when side = PlasmaLeft, * and is N otherwise. Only the uplo triangular part is referenced. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,ka). * * @param[in] B * B is a LDB-by-N matrix, where the leading M-by-N part of * the array B must contain the matrix B. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,M). * * @param[in] beta * Specifies the scalar beta. * * @param[in,out] C * C is a LDC-by-N matrix. * On exit, the array is overwritten by the M by N updated matrix. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zhemm_Tile * @sa PLASMA_chemm * @sa PLASMA_dhemm * @sa PLASMA_shemm * ******************************************************************************/ int PLASMA_zhemm(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC) { int NB; int Am; int status; PLASMA_desc descA, descB, descC; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zhemm", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ( (side != PlasmaLeft) && (side != PlasmaRight) ){ plasma_error("PLASMA_zhemm", "illegal value of side"); return -1; } if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) { plasma_error("PLASMA_zhemm", "illegal value of uplo"); return -2; } Am = ( side == PlasmaLeft ) ? M : N; if (M < 0) { plasma_error("PLASMA_zhemm", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_zhemm", "illegal value of N"); return -4; } if (LDA < max(1, Am)) { plasma_error("PLASMA_zhemm", "illegal value of LDA"); return -7; } if (LDB < max(1, M)) { plasma_error("PLASMA_zhemm", "illegal value of LDB"); return -9; } if (LDC < max(1, M)) { plasma_error("PLASMA_zhemm", "illegal value of LDC"); return -12; } /* Quick return */ if (M == 0 || N == 0 || ((alpha == (PLASMA_Complex64_t)0.0) && beta == (PLASMA_Complex64_t)1.0)) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_ZHEMM, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zhemm", "plasma_tune() failed"); return status; } /* Set MT & NT & KT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am, plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC))); } else {
/***************************************************************************//** * * @ingroup double * * PLASMA_dormlq - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal * matrix (unitary in the complex case) defined as the product of elementary reflectors returned * by PLASMA_dgelqf. Q is of order M. * ******************************************************************************* * * @param[in] side * Intended usage: * = PlasmaLeft: apply Q or Q**T from the left; * = PlasmaRight: apply Q or Q**T from the right. * Currently only PlasmaLeft is supported. * * @param[in] trans * Intended usage: * = PlasmaNoTrans: no transpose, apply Q; * = PlasmaTrans: ugate transpose, apply Q**T. * Currently only PlasmaTrans is supported. * * @param[in] M * The number of rows of the matrix C. M >= 0. * * @param[in] N * The number of columns of the matrix C. N >= 0. * * @param[in] K * The number of rows of elementary tile reflectors whose product defines the matrix Q. * M >= K >= 0. * * @param[in] A * Details of the LQ factorization of the original matrix A as returned by PLASMA_dgelqf. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,K). * * @param[in] T * Auxiliary factorization data, computed by PLASMA_dgelqf. * * @param[in,out] B * On entry, the M-by-N matrix B. * On exit, B is overwritten by Q*B or Q**T*B. * * @param[in] LDB * The leading dimension of the array C. LDC >= max(1,M). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_dormlq_Tile * @sa PLASMA_dormlq_Tile_Async * @sa PLASMA_cunmlq * @sa PLASMA_dormlq * @sa PLASMA_sormlq * @sa PLASMA_dgelqf * ******************************************************************************/ int PLASMA_dormlq(PLASMA_enum side, PLASMA_enum trans, int M, int N, int K, double *A, int LDA, double *T, double *B, int LDB) { int NB, IB, IBNB, KT, NT, An; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dormlq", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (side == PlasmaLeft) An = M; else An = N; /* Check input arguments */ if ( (side != PlasmaLeft) && (side != PlasmaRight) ) { plasma_error("PLASMA_dormlq", "illegal value of side"); return -1; } if ( (trans != PlasmaTrans) && (trans != PlasmaNoTrans) ){ plasma_error("PLASMA_dormlq", "illegal value of trans"); return -2; } if (M < 0) { plasma_error("PLASMA_dormlq", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_dormlq", "illegal value of N"); return -4; } if ((K < 0) || (K > An)) { plasma_error("PLASMA_dormlq", "illegal value of K"); return -5; } if (LDA < max(1, K)) { plasma_error("PLASMA_dormlq", "illegal value of LDA"); return -7; } if (LDB < max(1, M)) { plasma_error("PLASMA_dormlq", "illegal value of LDB"); return -10; } /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */ if (min(M, min(N, K)) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_DGELS, M, K, N); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_dormlq", "plasma_tune() failed"); return status; } /* Set MT, NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; KT = ( K%NB==0) ? (K /NB) : (K /NB+1); NT = (An%NB==0) ? (An/NB) : (An/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaRealDouble, IB, NB, IBNB, KT*IB, NT*NB, 0, 0, KT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaRealDouble, IB, NB, IBNB, KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB); } descT.mat = T; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, plasma_desc_mat_free(&(descA)) ); plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_csyrk - Performs one of the hermitian rank k operations * * \f[ C = \alpha [ op( A ) \times conjfg( op( A )' )] + \beta C \f], * * where op( X ) is one of * * op( X ) = X or op( X ) = conjfg( X' ) * * where alpha and beta are real scalars, C is an n-by-n hermitian * matrix and A is an n-by-k matrix in the first case and a k-by-n * matrix in the second case. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of C is stored; * = PlasmaLower: Lower triangle of C is stored. * * @param[in] trans * Specifies whether the matrix A is transposed or conjfugate transposed: * = PlasmaNoTrans: A is not transposed; * = PlasmaTrans : A is transposed. * * @param[in] N * N specifies the order of the matrix C. N must be at least zero. * * @param[in] K * K specifies the number of columns of the matrix op( A ). * * @param[in] alpha * alpha specifies the scalar alpha. * * @param[in] A * A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, * and is N otherwise. * * @param[in] LDA * The leading dimension of the array A. LDA must be at least * max( 1, N ), otherwise LDA must be at least max( 1, K ). * * @param[in] beta * beta specifies the scalar beta * * @param[in,out] C * C is a LDC-by-N matrix. * On exit, the array uplo part of the matrix is overwritten * by the uplo part of the updated matrix. * * @param[in] LDC * The leading dimension of the array C. LDC >= max( 1, N ). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_csyrk_Tile * @sa PLASMA_csyrk * @sa PLASMA_dsyrk * @sa PLASMA_ssyrk * ******************************************************************************/ int PLASMA_csyrk(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC) { int NB; int Am, An; int status; PLASMA_desc descA, descC; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_csyrk", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) { plasma_error("PLASMA_csyrk", "illegal value of uplo"); return -1; } if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) { plasma_error("PLASMA_csyrk", "illegal value of trans"); return -2; } if ( trans == PlasmaNoTrans ) { Am = N; An = K; } else { Am = K; An = N; } if (N < 0) { plasma_error("PLASMA_csyrk", "illegal value of N"); return -3; } if (K < 0) { plasma_error("PLASMA_csyrk", "illegal value of K"); return -4; } if (LDA < max(1, Am)) { plasma_error("PLASMA_csyrk", "illegal value of LDA"); return -7; } if (LDC < max(1, N)) { plasma_error("PLASMA_csyrk", "illegal value of LDC"); return -10; } /* Quick return */ if (N == 0 || ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (PLASMA_Complex32_t)1.0)) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_CSYRK, N, K, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_csyrk", "plasma_tune() failed"); return status; } /* Set MT & NT & KT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) ); plasma_cooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC))); } else {
/***************************************************************************//** * * @ingroup double * * PLASMA_dgels - solves overdetermined or underdetermined linear systems involving an M-by-N * matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. * The following options are provided: * * # trans = PlasmaNoTrans and M >= N: find the least squares solution of an overdetermined * system, i.e., solve the least squares problem: minimize || B - A*X ||. * * # trans = PlasmaNoTrans and M < N: find the minimum norm solution of an underdetermined * system A * X = B. * * Several right hand side vectors B and solution vectors X can be handled in a single call; * they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS * solution matrix X. * ******************************************************************************* * * @param[in] trans * Intended usage: * = PlasmaNoTrans: the linear system involves A; * = PlasmaTrans: the linear system involves A**T. * Currently only PlasmaNoTrans is supported. * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrices B and X. * NRHS >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, * if M >= N, A is overwritten by details of its QR factorization as returned by * PLASMA_dgeqrf; * if M < N, A is overwritten by details of its LQ factorization as returned by * PLASMA_dgelqf. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] T * On exit, auxiliary factorization data. * * @param[in,out] B * On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; * On exit, if return value = 0, B is overwritten by the solution vectors, stored * columnwise: * if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual * sum of squares for the solution in each column is given by the sum of squares of the * modulus of elements N+1 to M in that column; * if M < N, rows 1 to N of B contain the minimum norm solution vectors; * * @param[in] LDB * The leading dimension of the array B. LDB >= MAX(1,M,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_dgels_Tile * @sa PLASMA_dgels_Tile_Async * @sa PLASMA_cgels * @sa PLASMA_dgels * @sa PLASMA_sgels * ******************************************************************************/ int PLASMA_dgels(PLASMA_enum trans, int M, int N, int NRHS, double *A, int LDA, double *T, double *B, int LDB) { int i, j; int NB, IB, IBNB, MT, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dgels", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (trans != PlasmaNoTrans) { plasma_error("PLASMA_dgels", "only PlasmaNoTrans supported"); return PLASMA_ERR_NOT_SUPPORTED; } if (M < 0) { plasma_error("PLASMA_dgels", "illegal value of M"); return -2; } if (N < 0) { plasma_error("PLASMA_dgels", "illegal value of N"); return -3; } if (NRHS < 0) { plasma_error("PLASMA_dgels", "illegal value of NRHS"); return -4; } if (LDA < max(1, M)) { plasma_error("PLASMA_dgels", "illegal value of LDA"); return -6; } if (LDB < max(1, max(M, N))) { plasma_error("PLASMA_dgels", "illegal value of LDB"); return -9; } /* Quick return */ if (min(M, min(N, NRHS)) == 0) { for (i = 0; i < max(M, N); i++) for (j = 0; j < NRHS; j++) B[j*LDB+i] = 0.0; return PLASMA_SUCCESS; } /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_DGELS, M, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_dgels", "plasma_tune() failed"); return status; } /* Set MT, NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); MT = (M%NB==0) ? (M/NB) : (M/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaRealDouble, IB, NB, IBNB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaRealDouble, IB, NB, IBNB, MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB); } descT.mat = T; if ( M >= N ) { if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) ); plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zunglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the * first M rows of a product of the elementary reflectors returned by PLASMA_zgelqf. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix Q. M >= 0. * * @param[in] N * The number of columns of the matrix Q. N >= M. * * @param[in] K * The number of rows of elementary tile reflectors whose product defines the matrix Q. * M >= K >= 0. * * @param[in] A * Details of the LQ factorization of the original matrix A as returned by PLASMA_zgelqf. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] T * Auxiliary factorization data, computed by PLASMA_zgelqf. * * @param[out] Q * On exit, the M-by-N matrix Q. * * @param[in] LDQ * The leading dimension of the array Q. LDQ >= max(1,M). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval PLASMA_SUCCESS <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zunglq_Tile * @sa PLASMA_zunglq_Tile_Async * @sa PLASMA_cunglq * @sa PLASMA_dorglq * @sa PLASMA_sorglq * @sa PLASMA_zgelqf * ******************************************************************************/ int PLASMA_zunglq(int M, int N, int K, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T, PLASMA_Complex64_t *Q, int LDQ) { int NB, IB, IBNB, KT, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descQ, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zunglq", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (M < 0) { plasma_error("PLASMA_zunglq", "illegal value of M"); return -1; } if (N < M) { plasma_error("PLASMA_zunglq", "illegal value of N"); return -2; } if (K < 0 || K > M) { plasma_error("PLASMA_zunglq", "illegal value of K"); return -3; } if (LDA < max(1, M)) { plasma_error("PLASMA_zunglq", "illegal value of LDA"); return -5; } if (LDQ < max(1, M)) { plasma_error("PLASMA_zunglq", "illegal value of LDQ"); return -8; } /* Quick return - currently NOT equivalent to LAPACK's: * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */ if (min(M, min(N, K)) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zunglq", "plasma_tune() failed"); return status; } /* Set MT & NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); KT = (K%NB==0) ? (K/NB) : (K/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaComplexDouble, IB, NB, IBNB, KT*IB, NT*NB, 0, 0, KT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaComplexDouble, IB, NB, IBNB, KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB); } descT.mat = T; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zlansy returns the value * * zlansy = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm * ( * ( norm1(A), NORM = PlasmaOneNorm * ( * ( normI(A), NORM = PlasmaInfNorm * ( * ( normF(A), NORM = PlasmaFrobeniusNorm * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum * of squares). Note that max(abs(A(i,j))) is not a consistent matrix * norm. * ******************************************************************************* * * @param[in] norm * = PlasmaMaxNorm: Max norm * = PlasmaOneNorm: One norm * = PlasmaInfNorm: Infinity norm * = PlasmaFrobeniusNorm: Frobenius norm * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The number of columns/rows of the matrix A. N >= 0. When N = 0, * the returned value is set to zero. * * @param[in] A * The N-by-N matrix A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * ******************************************************************************* * * @return * \retval the norm described above. * ******************************************************************************* * * @sa PLASMA_zlansy_Tile * @sa PLASMA_zlansy_Tile_Async * @sa PLASMA_clansy * @sa PLASMA_dlansy * @sa PLASMA_slansy * ******************************************************************************/ double PLASMA_zlansy(PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA) { int NB; int status; double value; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlansy", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm) && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) { plasma_error("PLASMA_zlansy", "illegal value of norm"); return -1; } if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) { plasma_error("PLASMA_zlansy", "illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_zlansy", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zlansy", "illegal value of LDA"); return -5; } /* Quick return */ if ( N == 0) return (double)0.0; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zlansy", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, plasma_desc_mat_free(&(descA)) ); } else { plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request); } /* Call the tile interface */ PLASMA_zlansy_Tile_Async(norm, uplo, &descA, &value, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); } plasma_sequence_destroy(plasma, sequence); return value; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_cpotrf - Computes the Cholesky factorization of a symmetric positive definite * (or Hermitian positive definite in the complex case) matrix A. * The factorization has the form * * \f[ A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper} \f] * * where U is an upper triangular matrix and L is a lower triangular matrix. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric positive definite (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A * contains the upper triangular part of the matrix A, and the strictly lower triangular * part of A is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization * A = U**H*U or A = L*L**H. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* * * @sa PLASMA_cpotrf_Tile * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_cpotrf * @sa PLASMA_dpotrf * @sa PLASMA_spotrf * @sa PLASMA_cpotrs * ******************************************************************************/ int PLASMA_cpotrf(PLASMA_enum uplo, int N, PLASMA_Complex32_t *A, int LDA) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_cpotrf", "illegal value of uplo"); return -1; } if (N < 0) { plasma_error("PLASMA_cpotrf", "illegal value of N"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_cpotrf", "illegal value of LDA"); return -4; } /* Quick return */ if (max(N, 0) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_cpotrf", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) ); } else { plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N); } /* Call the tile interface */ PLASMA_cpotrf_Tile_Async(uplo, &descA, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ciptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float * * PLASMA_sgelqf - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the * diagonal represent the unitary matrix Q as a product of elementary reflectors, stored * by tiles. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] T * On exit, auxiliary factorization data, required by PLASMA_sgelqs to solve the system * of equations. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_sgelqf_Tile * @sa PLASMA_sgelqf_Tile_Async * @sa PLASMA_cgelqf * @sa PLASMA_dgelqf * @sa PLASMA_sgelqf * @sa PLASMA_sgelqs * ******************************************************************************/ int PLASMA_sgelqf(int M, int N, float *A, int LDA, float *T) { int NB, IB, IBNB, MT, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_sgelqf", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (M < 0) { plasma_error("PLASMA_sgelqf", "illegal value of M"); return -1; } if (N < 0) { plasma_error("PLASMA_sgelqf", "illegal value of N"); return -2; } if (LDA < max(1, M)) { plasma_error("PLASMA_sgelqf", "illegal value of LDA"); return -4; } /* Quick return */ if (min(M, N) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_SGELS, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_sgelqf", "plasma_tune() failed"); return status; } /* Set MT & NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaRealFloat, IB, NB, IBNB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaRealFloat, IB, NB, IBNB, MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB); } descT.mat = T; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) ); } else { plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N); } /* Call the tile interface */ PLASMA_sgelqf_Tile_Async(&descA, &descT, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_siptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgetri - Computes the inverse of a matrix using the LU factorization * computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * ******************************************************************************* * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the triangular factor L or U from the * factorization A = P*L*U as computed by PLASMA_zgetrf. * On exit, if return value = 0, the inverse of the original * matrix A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in] IPIV * The pivot indices that define the permutations * as returned by PLASMA_zgetrf. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, the (i,i) element of the factor U is * exactly zero; The matrix is singular * and its inverse could not be computed. * ******************************************************************************* * * @sa PLASMA_zgetri_Tile * @sa PLASMA_zgetri_Tile_Async * @sa PLASMA_cgetri * @sa PLASMA_dgetri * @sa PLASMA_sgetri * @sa PLASMA_zgetrf * ******************************************************************************/ int PLASMA_zgetri(int N, PLASMA_Complex64_t *A, int LDA, int *IPIV) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; PLASMA_desc descW; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_zgetri", "illegal value of N"); return -1; } if (LDA < max(1, N)) { plasma_error("PLASMA_zgetri", "illegal value of LDA"); return -3; } /* Quick return */ if (max(N, 0) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, plasma_desc_mat_free(&(descA)) ); } else { plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request); } /* Allocate workspace */ PLASMA_Alloc_Workspace_zgetri_Tile_Async(&descA, &descW); /* Call the tile interface */ PLASMA_zgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); } plasma_desc_mat_free(&(descW)); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float * * PLASMA_sgemm - Performs one of the matrix-matrix operations * * \f[ C = \alpha [op( A )\times op( B )] + \beta C \f], * * where op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = g( X' ) * * alpha and beta are scalars, and A, B and C are matrices, with op( A ) * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. * ******************************************************************************* * * @param[in] transA * Specifies whether the matrix A is transposed, not transposed or ugate transposed: * = PlasmaNoTrans: A is not transposed; * = PlasmaTrans: A is transposed; * = PlasmaTrans: A is ugate transposed. * * @param[in] transB * Specifies whether the matrix B is transposed, not transposed or ugate transposed: * = PlasmaNoTrans: B is not transposed; * = PlasmaTrans: B is transposed; * = PlasmaTrans: B is ugate transposed. * * @param[in] M * M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0. * * @param[in] N * N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0. * * @param[in] K * K specifies the number of columns of the matrix op( A ) and the number of rows of * the matrix op( B ). K >= 0. * * @param[in] alpha * alpha specifies the scalar alpha * * @param[in] A * A is a LDA-by-ka matrix, where ka is K when transA = PlasmaNoTrans, * and is M otherwise. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] B * B is a LDB-by-kb matrix, where kb is N when transB = PlasmaNoTrans, * and is K otherwise. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * * @param[in] beta * beta specifies the scalar beta * * @param[in,out] C * C is a LDC-by-N matrix. * On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ) * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_sgemm_Tile * @sa PLASMA_cgemm * @sa PLASMA_dgemm * @sa PLASMA_sgemm * ******************************************************************************/ int PLASMA_sgemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, float alpha, float *A, int LDA, float *B, int LDB, float beta, float *C, int LDC) { int NB; int Am, An, Bm, Bn; int status; PLASMA_desc descA, descB, descC; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_sgemm", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaTrans)) { plasma_error("PLASMA_sgemm", "illegal value of transA"); return -1; } if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaTrans)) { plasma_error("PLASMA_sgemm", "illegal value of transB"); return -2; } if ( transA == PlasmaNoTrans ) { Am = M; An = K; } else { Am = K; An = M; } if ( transB == PlasmaNoTrans ) { Bm = K; Bn = N; } else { Bm = N; Bn = K; } if (M < 0) { plasma_error("PLASMA_sgemm", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_sgemm", "illegal value of N"); return -4; } if (K < 0) { plasma_error("PLASMA_sgemm", "illegal value of N"); return -5; } if (LDA < max(1, Am)) { plasma_error("PLASMA_sgemm", "illegal value of LDA"); return -8; } if (LDB < max(1, Bm)) { plasma_error("PLASMA_sgemm", "illegal value of LDB"); return -10; } if (LDC < max(1, M)) { plasma_error("PLASMA_sgemm", "illegal value of LDC"); return -13; } /* Quick return */ if (M == 0 || N == 0 || ((alpha == (float)0.0 || K == 0) && beta == (float)1.0)) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_SGEMM, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_sgemm", "plasma_tune() failed"); return status; } /* Set MT & NT & KT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) ); plasma_sooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); plasma_sooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_ssygv - Computes all eigenvalues and, optionally, * eigenvectors of a complex generalized Hermitian-definite * eigenproblem of the form: A*x=(lambda)*B*x, A*Bx=(lambda)*x, or * B*A*x=(lambda)*x. * Here A and B are assumed to be Hermitian and B is also positive * definite. * Note: Only PlasmaNoVec supported! * ******************************************************************************* * * @param[in] PlasmaItype * Intended usage: * = 1: A*x=(lambda)*B*x * = 2: A*Bx=(lambda)*x * = 3: B*A*x=(lambda)*x * * @param[in] jobz * Intended usage: * = PlasmaNoVec: computes eigenvalues only; * = PlasmaVec: computes eigenvalues and eigenvectors. * Note: Only PlasmaNoVec supported! * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A and B are stored; * = PlasmaLower: Lower triangle of A and B are stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, if jobz = PlasmaVec, then if return value = 0, A * contains the matrix Z of eigenvectors. * The eigenvectors are normalized as follows: * if ITYPE = 1 or 2, Z**T*B*Z = I; * if ITYPE = 3, Z**T*inv(B)*Z = I. * If jobz = PlasmaNoVec, then on exit the lower triangle (if * uplo = PlasmaLower) or the upper triangle (if uplo = * PlasmaUpper) of A, including the diagonal, is destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the symmetric (or Hermitian) positive definite * matrix B. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of B contains the upper triangular part of the matrix * B, and the strictly lower triangular part of B is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of B contains the lower triangular part of the matrix * B, and the strictly upper triangular part of B is not * referenced. * On exit, if return value <= N, the part of B containing * the matrix is overwritten by the triangular factor U or L * from the Cholesky factorization B = U**T*U or B = L*L**T. * * @param[in] LDB * The leading dimension of the array B. LDA >= max(1,N). * * @param[out] W * On exit, if info = 0, the eigenvalues. * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_ssygv * On exit, contains auxiliary factorization data. * * @param[out] Q * On exit, if jobz = PlasmaVec and info = 0, the eigenvectors. * * @param[in] LDQ * The leading dimension of Q. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval <=N if INFO = i, plasma_ssygv failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * \retval >N if INFO = N + i, for 1 <= i <= N, then the leading * minor of order i of B is not positive definite. * The factorization of B could not be completed and * no eigenvalues or eigenvectors were computed. * ******************************************************************************* * * @sa PLASMA_ssygv_Tile * @sa PLASMA_ssygv_Tile_Async * @sa PLASMA_chegv * @sa PLASMA_dsygv * @sa PLASMA_ssygv * ******************************************************************************/ int PLASMA_ssygv(PLASMA_enum itype, PLASMA_enum jobz, PLASMA_enum uplo, int N, float *A, int LDA, float *B, int LDB, float *W, PLASMA_desc *descT, float *Q, int LDQ) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descQ; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_ssygv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SSYGV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_ssygv", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Check input arguments */ if (itype != 1 && itype != 2 && itype != 3) { plasma_error("PLASMA_ssygv", "Illegal value of itype"); return -1; } if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssygv", "illegal value of jobz"); return -2; } if (uplo != PlasmaLower && uplo!= PlasmaUpper) { plasma_error("PLASMA_ssygv", "only PlasmaLower supported"); return -3; } if (N < 0) { plasma_error("PLASMA_ssygv", "illegal value of N"); return -4; } if (LDA < max(1, N)) { plasma_error("PLASMA_ssygv", "illegal value of LDA"); return -6; } if (LDB < max(1, N)) { plasma_error("PLASMA_ssygv", "illegal value of LDB"); return -8; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != NT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_ssygv", "invalid T descriptor"); return -10; } if (LDQ < max(1, N)) { plasma_error("PLASMA_ssygv", "illegal value of LDQ"); return -12; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; if (jobz == PlasmaVec) { plasma_error("PLASMA_ssygv", "computing the eigenvectors is not supported in this version"); return -1; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) ); plasma_sooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) ); if (jobz == PlasmaVec) { /* No need for conversion, it's just output */ plasma_sdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descQ)) ); }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_cgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization * A = L*Q computed by PLASMA_cgelqf. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= M >= 0. * * @param[in] NRHS * The number of columns of B. NRHS >= 0. * * @param[in] A * Details of the LQ factorization of the original matrix A as returned by PLASMA_cgelqf. * * @param[in] LDA * The leading dimension of the array A. LDA >= M. * * @param[in] T * Auxiliary factorization data, computed by PLASMA_cgelqf. * * @param[in,out] B * On entry, the M-by-NRHS right hand side matrix B. * On exit, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= N. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_cgelqs_Tile * @sa PLASMA_cgelqs_Tile_Async * @sa PLASMA_cgelqs * @sa PLASMA_dgelqs * @sa PLASMA_sgelqs * @sa PLASMA_cgelqf * ******************************************************************************/ int PLASMA_cgelqs(int M, int N, int NRHS, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *T, PLASMA_Complex32_t *B, int LDB) { int NB, IB, IBNB, MT, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cgelqs", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (M < 0) { plasma_error("PLASMA_cgelqs", "illegal value of M"); return -1; } if (N < 0 || M > N) { plasma_error("PLASMA_cgelqs", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_cgelqs", "illegal value of N"); return -3; } if (LDA < max(1, M)) { plasma_error("PLASMA_cgelqs", "illegal value of LDA"); return -5; } if (LDB < max(1, max(1, N))) { plasma_error("PLASMA_cgelqs", "illegal value of LDB"); return -8; } /* Quick return */ if (min(M, min(N, NRHS)) == 0) { return PLASMA_SUCCESS; } /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_CGELS, M, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_cgelqs", "plasma_tune() failed"); return status; } /* Set MT, NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaComplexFloat, IB, NB, IBNB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaComplexFloat, IB, NB, IBNB, MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB); } descT.mat = T; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) ); plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgetrs_incpiv - Solves a system of linear equations A * X = B, with a general N-by-N matrix A * using the tile LU factorization computed by PLASMA_zgetrf_incpiv. * ******************************************************************************* * * @param[in] trans * Intended to specify the the form of the system of equations: * = PlasmaNoTrans: A * X = B (No transpose) * = PlasmaTrans: A**T * X = B (Transpose) * = PlasmaConjTrans: A**H * X = B (Conjugate transpose) * Currently only PlasmaNoTrans is supported. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. * NRHS >= 0. * * @param[in] A * The tile factors L and U from the factorization, computed by PLASMA_zgetrf_incpiv. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in] L * Auxiliary factorization data, related to the tile L factor, computed by PLASMA_zgetrf_incpiv. * * @param[in] IPIV * The pivot indices from PLASMA_zgetrf_incpiv (not equivalent to LAPACK). * * @param[in,out] B * On entry, the N-by-NRHS matrix of right hand side matrix B. * On exit, the solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \return <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zgetrs_incpiv_Tile * @sa PLASMA_zgetrs_incpiv_Tile_Async * @sa PLASMA_cgetrs_incpiv * @sa PLASMA_dgetrs_incpiv * @sa PLASMA_sgetrs_incpiv * @sa PLASMA_zgetrf_incpiv * ******************************************************************************/ int PLASMA_zgetrs_incpiv(PLASMA_enum trans, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *L, int *IPIV, PLASMA_Complex64_t *B, int LDB) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descL; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetrs_incpiv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (trans != PlasmaNoTrans) { plasma_error("PLASMA_zgetrs_incpiv", "only PlasmaNoTrans supported"); return PLASMA_ERR_NOT_SUPPORTED; } if (N < 0) { plasma_error("PLASMA_zgetrs_incpiv", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_zgetrs_incpiv", "illegal value of NRHS"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zgetrs_incpiv", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_zgetrs_incpiv", "illegal value of LDB"); return -9; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetrs_incpiv", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); descL = plasma_desc_init( PlasmaComplexDouble, IB, NB, IBNB, NT*IB, NT*NB, 0, 0, NT*IB, NT*NB); descL.mat = L; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_ssytrd - reduces a complex Hermitian matrix A to real symmetric * tridiagonal form S using a two-stage approach * First stage: reduction to band tridiagonal form (unitary Q1); * Second stage: reduction from band to tridiagonal form (unitary * Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**T * * A * Q = S. * Not LAPACK compliant as A does not contain the T elements * Note: Only PlasmaNoVec supported! * ******************************************************************************* * * @param[in] jobz * Intended usage: * = PlasmaNoVec: computes eigenvalues only; * = PlasmaVec: computes eigenvalues and eigenvectors. * Note: Only PlasmaNoVec supported! * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, the lower triangle (if uplo = PlasmaLower) or the * upper triangle (if uplo = PlasmaUpper) of A, including the * diagonal, is destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[out] D * On exit, the diagonal elements of the tridiagonal matrix: * D(i) = A(i,i). * * @param[out] E * On exit, he off-diagonal elements of the tridiagonal matrix: * E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower. * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_ssyev * On exit, contains auxiliary factorization data. * * @param[out] Q * On exit, if jobz = PlasmaVec and info = 0, the eigenvectors. * * @param[in] LDQ * The leading dimension of the array Q. LDQ >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * ******************************************************************************* * * @sa PLASMA_ssytrd_Tile * @sa PLASMA_ssytrd_Tile_Async * @sa PLASMA_chetrd * @sa PLASMA_dsytrd * @sa PLASMA_ssytrd * ******************************************************************************/ int PLASMA_ssytrd(PLASMA_enum jobz, PLASMA_enum uplo, int N, float *A, int LDA, float *D, float *E, PLASMA_desc *descT, float *Q, int LDQ) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descQ; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_ssytrd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SSYTRD, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Check input arguments */ if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssytrd", "illegal value of jobz"); return -1; } if (uplo != PlasmaLower && uplo != PlasmaUpper) { plasma_error("PLASMA_ssytrd", "illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_ssytrd", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_ssytrd", "illegal value of LDA"); return -5; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != NT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_ssytrd", "invalid T descriptor"); return -8; } if (LDQ < max(1, N)) { plasma_error("PLASMA_ssytrd", "illegal value of LDQ"); return -10; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; if (jobz == PlasmaVec) { plasma_error("PLASMA_ssytrd", "computing the eigenvectors is not supported in this version"); return -1; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); if (jobz == PlasmaVec) { plasma_sooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N , plasma_desc_mat_free(&(descQ)) ); } } else { plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N ); if (jobz == PlasmaVec) plasma_siplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N ); } /* Call the tile interface */ PLASMA_ssytrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooptile2lap( descA, A, NB, NB, LDA, N ); if (jobz == PlasmaVec) { plasma_sooptile2lap( descQ, Q, NB, NB, LDQ, N ); } plasma_dynamic_sync(); plasma_desc_mat_free(&descA); if (jobz == PlasmaVec) plasma_desc_mat_free(&descQ); } else { plasma_siptile2lap( descA, A, NB, NB, LDA, N ); if (jobz == PlasmaVec) plasma_siptile2lap( descQ, Q, NB, NB, LDQ, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_cgetrf - Computes an LU factorization of a general M-by-N matrix A * using the tile LU algorithm with partial tile pivoting with row interchanges. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix to be factored. * On exit, the tile factors L and U from the factorization. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] IPIV * The pivot indices that define the permutations. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * ******************************************************************************* * * @sa PLASMA_cgetrf_Tile * @sa PLASMA_cgetrf_Tile_Async * @sa PLASMA_cgetrf * @sa PLASMA_dgetrf * @sa PLASMA_sgetrf * ******************************************************************************/ int PLASMA_cgetrf(int M, int N, PLASMA_Complex32_t *A, int LDA, int *IPIV) { int NB, NBNB, minMN; int status; PLASMA_desc descA ; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cgetrf", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (M < 0) { plasma_error("PLASMA_cgetrf", "illegal value of M"); return -1; } if (N < 0) { plasma_error("PLASMA_cgetrf", "illegal value of N"); return -2; } if (LDA < max(1, M)) { plasma_error("PLASMA_cgetrf", "illegal value of LDA"); return -4; } /* Quick return */ if (min(M, N) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_CGESV, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_cgetrf", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; NBNB = NB*NB; plasma_sequence_create(plasma, &sequence); descA = plasma_desc_init( PlasmaComplexFloat, NB, NB, NBNB, LDA, N, 0, 0, M, N); descA.mat = A; minMN = min(M, N); memset(IPIV, 0, minMN*sizeof(int)); /* Call the tile interface */ plasma_dynamic_call_4(plasma_pcgetrf_reclap, PLASMA_desc, descA, int*, IPIV, PLASMA_sequence*, sequence, PLASMA_request*, &request); plasma_dynamic_sync(); /* * Generate the correct IPIV (Has to be move in a task) */ { int i, inc, tmp, j; for(i=1; i<descA.mt; i++) { inc = i*descA.mb; tmp = min( minMN - inc, descA.mb); if ( tmp < 1 ) break; for (j=0; j<tmp; j++) IPIV[inc+j] = IPIV[inc+j] + inc; } } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex * M-by-N matrix A, optionally computing the left and/or right singular * vectors. The SVD is written * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A; they are real and non-negative, and * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * * Note that the routine returns V**T, not V. * Not LAPACK Compliant for now! * Note: Only PlasmaNoVec supported! ******************************************************************************* * * @param[in] jobu * Specifies options for computing all or part of the matrix U. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] jobvt * Specifies options for computing all or part of the matrix V**H. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, * if JOBU = 'O', A is overwritten with the first min(m,n) * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) * rows of V**H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] S * The double precision singular values of A, sorted so that S(i) >= S(i+1). * * @param[out] U * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. * If JOBU = 'A', U contains the M-by-M unitary matrix U; * if JOBU = 'S', U contains the first min(m,n) columns of U * (the left singular vectors, stored columnwise); * if JOBU = 'N' or 'O', U is not referenced. * * @param[in] LDU * The leading dimension of the array U. LDU >= 1; if * JOBU = 'S' or 'A', LDU >= M. * * @param[out] VT * If JOBVT = 'A', VT contains the N-by-N unitary matrix * V**H; * if JOBVT = 'S', VT contains the first min(m,n) rows of * V**H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * @param[in] LDVT * The leading dimension of the array VT. LDVT >= 1; if * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd * On exit, contains auxiliary factorization data. * * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zgesvd_Tile * @sa PLASMA_zgesvd_Tile_Async * @sa PLASMA_cgesvd * @sa PLASMA_dgesvd * @sa PLASMA_sgesvd * ******************************************************************************/ int PLASMA_zgesvd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t *A, int LDA, double *S, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *VT, int LDVT, PLASMA_desc *descT) { int NB, IB, IBNB, minMN, MT, NT, minMTNT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descU, descVT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgesvd", "plasma_tune() failed"); return status; } /* Set MT, NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); minMN = min(M,N); minMTNT = min(MT,NT); /* Check input arguments */ if (jobu != PlasmaNoVec && jobu !=PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobu"); return -1; } if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobvt"); return -2; } if (M < 0) { plasma_error("PLASMA_zgesvd", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_zgesvd", "illegal value of N"); return -4; } if (LDA < max(1, M)) { plasma_error("PLASMA_zgesvd", "illegal value of LDA"); return -6; } if (LDU < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDU"); return -9; } if (LDVT < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDVT"); return -11; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != MT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_zgesvd", "invalid T descriptor"); return -12; } /* Quick return */ if (min(M, N) == 0) { return PLASMA_SUCCESS; } if (jobu == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -1; } if (jobvt == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -2; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) ); if (jobu == PlasmaVec){ plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU))); }
/***************************************************************************//** * * @ingroup double * * PLASMA_dsygst - reduces a complex Hermitian-definite generalized * eigenproblem to standard form. * If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is * overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) * If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x * = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must * have been previously factorized as U**T*U or L*L**T by * PLASMA_DPOTRF. * ******************************************************************************* * * @param[in] PlasmaItype * Intended usage: * = 1: A*x=(lambda)*B*x * = 2: A*Bx=(lambda)*x * = 3: B*A*x=(lambda)*x * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrices A and B. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value == 0, the transformed matrix, * stored in the same format as A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the triangular factor from the Cholesky * factorization of B, as returned by PLASMA_DPOTRF. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_dsygst_Tile * @sa PLASMA_dsygst_Tile_Async * @sa PLASMA_chegst * @sa PLASMA_dsygst * @sa PLASMA_ssygst * ******************************************************************************/ int PLASMA_dsygst(PLASMA_enum itype, PLASMA_enum uplo, int N, double *A, int LDA, double *B, int LDB) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dsygst", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (itype != 1 && itype != 2 && itype != 3) { plasma_error("PLASMA_dsygst", "Illegal value of itype"); return -1; } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_dsygst", "Illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_dsygst", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_dsygst", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_dsygst", "illegal value of LDB"); return -7; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_DSYGST, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) ); plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descB)) ); } else { plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N); plasma_diplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N); } /* Call the tile interface */ PLASMA_dsygst_Tile_Async(itype, uplo, &descA, &descB, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooptile2lap( descA, A, NB, NB, LDA, N ); plasma_dooptile2lap( descB, B, NB, NB, LDB, N ); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); plasma_desc_mat_free(&descB); } else { plasma_diptile2lap( descA, A, NB, NB, LDA, N ); plasma_diptile2lap( descB, B, NB, NB, LDB, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }