/** **************************************************************************** * * @ingroup InPlaceTransformation * * PLASMA_dgecfi convert the matrice A in place from format f_in to * format f_out * ******************************************************************************* * * @param[in] m * Number of rows of matrix A * * @param[in] n * Number of columns of matrix A * * @param[in,out] A * Matrix of size L*m*n * * @param[in] f_in * Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] imb * Number of rows of each block in original format * * @param[in] inb * Number of columns of each block in original format * * @param[in] f_out * Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] omb * Number of rows of each block in requested format * * @param[in] onb * Number of columns of each block in requested format * ******************************************************************************* * * @sa PLASMA_dgecfi_Async * ******************************************************************************/ int PLASMA_dgecfi(int m, int n, double *A, PLASMA_enum f_in, int imb, int inb, PLASMA_enum f_out, int omb, int onb) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error(__func__, "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_dgecfi_Async( m, n, A, f_in, imb, inb, f_out, omb, onb, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zgetri_Tile - Computes the inverse of a matrix using the LU factorization * computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * Tile equivalent of PLASMA_zgetri(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in,out] A * On entry, the triangular factor L or U from the * factorization A = P*L*U as computed by PLASMA_zgetrf. * On exit, if return value = 0, the inverse of the original * matrix A. * * @param[in] IPIV * The pivot indices that define the permutations * as returned by PLASMA_zgetrf. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval >0 if i, the (i,i) element of the factor U is * exactly zero; The matrix is singular * and its inverse could not be computed. * ******************************************************************************* * * @sa PLASMA_zgetri * @sa PLASMA_zgetri_Tile_Async * @sa PLASMA_cgetri_Tile * @sa PLASMA_dgetri_Tile * @sa PLASMA_sgetri_Tile * @sa PLASMA_zgetrf_Tile * ******************************************************************************/ int PLASMA_zgetri_Tile(PLASMA_desc *A, int *IPIV) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descW; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); /* Allocate workspace */ PLASMA_Alloc_Workspace_zgetri_Tile_Async(A, &descW); PLASMA_zgetri_Tile_Async(A, IPIV, &descW, sequence, &request); plasma_dynamic_sync(); plasma_desc_mat_free(&(descW)); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float * * PLASMA_splgsy - Generate a random hermitian matrix by tiles. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[out] A * On exit, The random hermitian matrix A generated. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_splgsy_Tile * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy * @sa PLASMA_dplgsy * @sa PLASMA_splgsy * @sa PLASMA_splrnt * @sa PLASMA_splgsy * ******************************************************************************/ int PLASMA_splgsy( float bump, int N, float *A, int LDA, unsigned long long int seed ) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_splgsy", "illegal value of N"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_splgsy", "illegal value of LDA"); return -4; } /* Quick return */ if (max(0, N) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SGEMM, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); descA = plasma_desc_init( PlasmaRealFloat, NB, NB, NB*NB, LDA, N, 0, 0, N, N); descA.mat = A; /* Call the tile interface */ PLASMA_splgsy_Tile_Async( bump, &descA, seed, sequence, &request ); plasma_siptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zlansy_Tile - Tile equivalent of PLASMA_zlansy(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] norm * = PlasmaMaxNorm: Max norm * = PlasmaOneNorm: One norm * = PlasmaInfNorm: Infinity norm * = PlasmaFrobeniusNorm: Frobenius norm * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] A * On entry, the triangular factor U or L. * On exit, if UPLO = 'U', the upper triangle of A is * overwritten with the upper triangle of the product U * U'; * if UPLO = 'L', the lower triangle of A is overwritten with * the lower triangle of the product L' * L. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zlansy * @sa PLASMA_zlansy_Tile_Async * @sa PLASMA_clansy_Tile * @sa PLASMA_dlansy_Tile * @sa PLASMA_slansy_Tile * ******************************************************************************/ double PLASMA_zlansy_Tile(PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; double value; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_zlansy_Tile_Async(norm, uplo, A, &value, sequence, &request); plasma_dynamic_sync(); plasma_sequence_destroy(plasma, sequence); return value; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zlaswp_Tile - performs a series of row interchanges on the matrix A. * One row interchange is initiated for each of rows K1 through K2 of A. * Tile equivalent of PLASMA_zlaswp(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A * The tile factors L and U from the factorization, computed by PLASMA_zgetrf. * * @param[in] K1 * The first element of IPIV for which a row interchange will * be done. * * @param[in] K2 * The last element of IPIV for which a row interchange will * be done. * * @param[in] IPIV * The pivot indices from PLASMA_zgetrf. * * @param[in] INCX * The increment between successive values of IPIV. If IPIV * is negative, the pivots are applied in reverse order. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zlaswp * @sa PLASMA_zlaswp_Tile_Async * @sa PLASMA_claswp_Tile * @sa PLASMA_dlaswp_Tile * @sa PLASMA_slaswp_Tile * @sa PLASMA_zgetrf_Tile * ******************************************************************************/ int PLASMA_zlaswp_Tile(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_zlaswp_Tile_Async(A, K1, K2, IPIV, INCX, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t_Tile * * PLASMA_cpotrf_Tile - Computes the Cholesky factorization of a symmetric positive definite * or Hermitian positive definite matrix. * Tile equivalent of PLASMA_cpotrf(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] A * On entry, the symmetric positive definite (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A * contains the upper triangular part of the matrix A, and the strictly lower triangular * part of A is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization * A = U**H*U or A = L*L**H. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* * * @sa PLASMA_cpotrf * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_cpotrf_Tile * @sa PLASMA_dpotrf_Tile * @sa PLASMA_spotrf_Tile * @sa PLASMA_cpotrs_Tile * ******************************************************************************/ int PLASMA_cpotrf_Tile(PLASMA_enum uplo, PLASMA_desc *A) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_cpotrf_Tile_Async(uplo, A, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float_Tile * * PLASMA_splgsy_Tile - Generate a random hermitian matrix by tiles. * Tile equivalent of PLASMA_splgsy(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] A * On exit, The random hermitian matrix A generated. * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_splgsy * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy_Tile * @sa PLASMA_dplgsy_Tile * @sa PLASMA_splgsy_Tile * @sa PLASMA_splrnt_Tile * @sa PLASMA_splgsy_Tile * ******************************************************************************/ int PLASMA_splgsy_Tile( float bump, PLASMA_desc *A, unsigned long long int seed ) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_splgsy_Tile_Async( bump, A, seed, sequence, &request ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float * * PLASMA_ssytrd - reduces a complex Hermitian matrix A to real symmetric * tridiagonal form S using a two-stage approach * First stage: reduction to band tridiagonal form (unitary Q1); * Second stage: reduction from band to tridiagonal form (unitary * Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**T * * A * Q = S. * Not LAPACK compliant as A does not contain the T elements * Note: Only PlasmaNoVec supported! * ******************************************************************************* * * @param[in] jobz * Intended usage: * = PlasmaNoVec: computes eigenvalues only; * = PlasmaVec: computes eigenvalues and eigenvectors. * Note: Only PlasmaNoVec supported! * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, the lower triangle (if uplo = PlasmaLower) or the * upper triangle (if uplo = PlasmaUpper) of A, including the * diagonal, is destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[out] D * On exit, the diagonal elements of the tridiagonal matrix: * D(i) = A(i,i). * * @param[out] E * On exit, he off-diagonal elements of the tridiagonal matrix: * E(i) = A(i,i+1) if uplo = PlasmaUpper, E(i) = A(i+1,i) if uplo = PlasmaLower. * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_ssyev * On exit, contains auxiliary factorization data. * * @param[out] Q * On exit, if jobz = PlasmaVec and info = 0, the eigenvectors. * * @param[in] LDQ * The leading dimension of the array Q. LDQ >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if INFO = i, the algorithm failed to converge; i * off-diagonal elements of an intermediate tridiagonal * form did not converge to zero. * ******************************************************************************* * * @sa PLASMA_ssytrd_Tile * @sa PLASMA_ssytrd_Tile_Async * @sa PLASMA_chetrd * @sa PLASMA_dsytrd * @sa PLASMA_ssytrd * ******************************************************************************/ int PLASMA_ssytrd(PLASMA_enum jobz, PLASMA_enum uplo, int N, float *A, int LDA, float *D, float *E, PLASMA_desc *descT, float *Q, int LDQ) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descQ; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_ssytrd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SSYTRD, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Check input arguments */ if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssytrd", "illegal value of jobz"); return -1; } if (uplo != PlasmaLower && uplo != PlasmaUpper) { plasma_error("PLASMA_ssytrd", "illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_ssytrd", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_ssytrd", "illegal value of LDA"); return -5; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != NT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_ssytrd", "invalid T descriptor"); return -8; } if (LDQ < max(1, N)) { plasma_error("PLASMA_ssytrd", "illegal value of LDQ"); return -10; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; if (jobz == PlasmaVec) { plasma_error("PLASMA_ssytrd", "computing the eigenvectors is not supported in this version"); return -1; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); if (jobz == PlasmaVec) { plasma_sooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N , plasma_desc_mat_free(&(descQ)) ); } } else { plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N ); if (jobz == PlasmaVec) plasma_siplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N ); } /* Call the tile interface */ PLASMA_ssytrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooptile2lap( descA, A, NB, NB, LDA, N ); if (jobz == PlasmaVec) { plasma_sooptile2lap( descQ, Q, NB, NB, LDQ, N ); } plasma_dynamic_sync(); plasma_desc_mat_free(&descA); if (jobz == PlasmaVec) plasma_desc_mat_free(&descQ); } else { plasma_siptile2lap( descA, A, NB, NB, LDA, N ); if (jobz == PlasmaVec) plasma_siptile2lap( descQ, Q, NB, NB, LDQ, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Finalize - Finalize PLASMA. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Finalize() { int core; int status; void *exitcodep; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Terminate the dynamic scheduler */ plasma_dynamic_sync(); /* Free quark structures */ QUARK_Free(plasma->quark); /* Set termination action */ pthread_mutex_lock(&plasma->action_mutex); plasma->action = PLASMA_ACT_FINALIZE; pthread_mutex_unlock(&plasma->action_mutex); pthread_cond_broadcast(&plasma->action_condt); /* Barrier and clear action */ plasma_barrier(plasma); plasma->action = PLASMA_ACT_STAND_BY; // Join threads for (core = 1; core < plasma->world_size; core++) { status = pthread_join(plasma->thread_id[core], &exitcodep); if (status != 0) { plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed"); return status; } } plasma_barrier_finalize(plasma); plasma_barrier_bw_finalize(plasma); /* Unbind main thread */ plasma_unsetaffinity(); /* Destroy thread attributes */ status = pthread_attr_destroy(&plasma->thread_attr); if (status != 0) plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed"); /* Destroy topology */ plasma_topology_finalize(); status = plasma_context_remove(plasma, pthread_self()); if (status != PLASMA_SUCCESS) { plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed"); return status; } /* Restore the concurency */ /* actually it's really bad, we shoulde set the concurrency only * if it's not already done and restore it only we had change it */ pthread_setconcurrency( 0 ); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgetri - Computes the inverse of a matrix using the LU factorization * computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * ******************************************************************************* * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the triangular factor L or U from the * factorization A = P*L*U as computed by PLASMA_zgetrf. * On exit, if return value = 0, the inverse of the original * matrix A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in] IPIV * The pivot indices that define the permutations * as returned by PLASMA_zgetrf. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, the (i,i) element of the factor U is * exactly zero; The matrix is singular * and its inverse could not be computed. * ******************************************************************************* * * @sa PLASMA_zgetri_Tile * @sa PLASMA_zgetri_Tile_Async * @sa PLASMA_cgetri * @sa PLASMA_dgetri * @sa PLASMA_sgetri * @sa PLASMA_zgetrf * ******************************************************************************/ int PLASMA_zgetri(int N, PLASMA_Complex64_t *A, int LDA, int *IPIV) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; PLASMA_desc descW; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_zgetri", "illegal value of N"); return -1; } if (LDA < max(1, N)) { plasma_error("PLASMA_zgetri", "illegal value of LDA"); return -3; } /* Quick return */ if (max(N, 0) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, plasma_desc_mat_free(&(descA)) ); } else { plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request); } /* Allocate workspace */ PLASMA_Alloc_Workspace_zgetri_Tile_Async(&descA, &descW); /* Call the tile interface */ PLASMA_zgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); } plasma_desc_mat_free(&(descW)); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_cgetrf - Computes an LU factorization of a general M-by-N matrix A * using the tile LU algorithm with partial tile pivoting with row interchanges. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix to be factored. * On exit, the tile factors L and U from the factorization. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] IPIV * The pivot indices that define the permutations. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, and division by zero will occur * if it is used to solve a system of equations. * ******************************************************************************* * * @sa PLASMA_cgetrf_Tile * @sa PLASMA_cgetrf_Tile_Async * @sa PLASMA_cgetrf * @sa PLASMA_dgetrf * @sa PLASMA_sgetrf * ******************************************************************************/ int PLASMA_cgetrf(int M, int N, PLASMA_Complex32_t *A, int LDA, int *IPIV) { int NB, NBNB, minMN; int status; PLASMA_desc descA ; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cgetrf", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (M < 0) { plasma_error("PLASMA_cgetrf", "illegal value of M"); return -1; } if (N < 0) { plasma_error("PLASMA_cgetrf", "illegal value of N"); return -2; } if (LDA < max(1, M)) { plasma_error("PLASMA_cgetrf", "illegal value of LDA"); return -4; } /* Quick return */ if (min(M, N) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_CGESV, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_cgetrf", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; NBNB = NB*NB; plasma_sequence_create(plasma, &sequence); descA = plasma_desc_init( PlasmaComplexFloat, NB, NB, NBNB, LDA, N, 0, 0, M, N); descA.mat = A; minMN = min(M, N); memset(IPIV, 0, minMN*sizeof(int)); /* Call the tile interface */ plasma_dynamic_call_4(plasma_pcgetrf_reclap, PLASMA_desc, descA, int*, IPIV, PLASMA_sequence*, sequence, PLASMA_request*, &request); plasma_dynamic_sync(); /* * Generate the correct IPIV (Has to be move in a task) */ { int i, inc, tmp, j; for(i=1; i<descA.mt; i++) { inc = i*descA.mb; tmp = min( minMN - inc, descA.mb); if ( tmp < 1 ) break; for (j=0; j<tmp; j++) IPIV[inc+j] = IPIV[inc+j] + inc; } } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zlansy returns the value * * zlansy = ( max(abs(A(i,j))), NORM = PlasmaMaxNorm * ( * ( norm1(A), NORM = PlasmaOneNorm * ( * ( normI(A), NORM = PlasmaInfNorm * ( * ( normF(A), NORM = PlasmaFrobeniusNorm * * where norm1 denotes the one norm of a matrix (maximum column sum), * normI denotes the infinity norm of a matrix (maximum row sum) and * normF denotes the Frobenius norm of a matrix (square root of sum * of squares). Note that max(abs(A(i,j))) is not a consistent matrix * norm. * ******************************************************************************* * * @param[in] norm * = PlasmaMaxNorm: Max norm * = PlasmaOneNorm: One norm * = PlasmaInfNorm: Infinity norm * = PlasmaFrobeniusNorm: Frobenius norm * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The number of columns/rows of the matrix A. N >= 0. When N = 0, * the returned value is set to zero. * * @param[in] A * The N-by-N matrix A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * ******************************************************************************* * * @return * \retval the norm described above. * ******************************************************************************* * * @sa PLASMA_zlansy_Tile * @sa PLASMA_zlansy_Tile_Async * @sa PLASMA_clansy * @sa PLASMA_dlansy * @sa PLASMA_slansy * ******************************************************************************/ double PLASMA_zlansy(PLASMA_enum norm, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA) { int NB; int status; double value; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlansy", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm) && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) { plasma_error("PLASMA_zlansy", "illegal value of norm"); return -1; } if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) { plasma_error("PLASMA_zlansy", "illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_zlansy", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zlansy", "illegal value of LDA"); return -5; } /* Quick return */ if ( N == 0) return (double)0.0; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGEMM, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zlansy", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, plasma_desc_mat_free(&(descA)) ); } else { plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request); } /* Call the tile interface */ PLASMA_zlansy_Tile_Async(norm, uplo, &descA, &value, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); } plasma_sequence_destroy(plasma, sequence); return value; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_cpotrf - Computes the Cholesky factorization of a symmetric positive definite * (or Hermitian positive definite in the complex case) matrix A. * The factorization has the form * * \f[ A = \{_{L\times L^H, if uplo = PlasmaLower}^{U^H\times U, if uplo = PlasmaUpper} \f] * * where U is an upper triangular matrix and L is a lower triangular matrix. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the symmetric positive definite (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A * contains the upper triangular part of the matrix A, and the strictly lower triangular * part of A is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization * A = U**H*U or A = L*L**H. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* * * @sa PLASMA_cpotrf_Tile * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_cpotrf * @sa PLASMA_dpotrf * @sa PLASMA_spotrf * @sa PLASMA_cpotrs * ******************************************************************************/ int PLASMA_cpotrf(PLASMA_enum uplo, int N, PLASMA_Complex32_t *A, int LDA) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_cpotrf", "illegal value of uplo"); return -1; } if (N < 0) { plasma_error("PLASMA_cpotrf", "illegal value of N"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_cpotrf", "illegal value of LDA"); return -4; } /* Quick return */ if (max(N, 0) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_CPOSV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_cpotrf", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) ); } else { plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N); } /* Call the tile interface */ PLASMA_cpotrf_Tile_Async(uplo, &descA, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ciptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float * * PLASMA_sgelqf - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) * lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the * diagonal represent the unitary matrix Q as a product of elementary reflectors, stored * by tiles. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] T * On exit, auxiliary factorization data, required by PLASMA_sgelqs to solve the system * of equations. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_sgelqf_Tile * @sa PLASMA_sgelqf_Tile_Async * @sa PLASMA_cgelqf * @sa PLASMA_dgelqf * @sa PLASMA_sgelqf * @sa PLASMA_sgelqs * ******************************************************************************/ int PLASMA_sgelqf(int M, int N, float *A, int LDA, float *T) { int NB, IB, IBNB, MT, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_sgelqf", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (M < 0) { plasma_error("PLASMA_sgelqf", "illegal value of M"); return -1; } if (N < 0) { plasma_error("PLASMA_sgelqf", "illegal value of N"); return -2; } if (LDA < max(1, M)) { plasma_error("PLASMA_sgelqf", "illegal value of LDA"); return -4; } /* Quick return */ if (min(M, N) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_SGELS, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_sgelqf", "plasma_tune() failed"); return status; } /* Set MT & NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaRealFloat, IB, NB, IBNB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaRealFloat, IB, NB, IBNB, MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB); } descT.mat = T; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) ); } else { plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N); } /* Call the tile interface */ PLASMA_sgelqf_Tile_Async(&descA, &descT, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_siptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup double * * PLASMA_dsygst - reduces a complex Hermitian-definite generalized * eigenproblem to standard form. * If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is * overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) * If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x * = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must * have been previously factorized as U**T*U or L*L**T by * PLASMA_DPOTRF. * ******************************************************************************* * * @param[in] PlasmaItype * Intended usage: * = 1: A*x=(lambda)*B*x * = 2: A*Bx=(lambda)*x * = 3: B*A*x=(lambda)*x * * @param[in] uplo * Specifies whether the matrix A is upper triangular or * lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrices A and B. N >= 0. * * @param[in,out] A * On entry, the symmetric (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of A contains the upper triangular part of the matrix * A, and the strictly lower triangular part of A is not * referenced. * If uplo = PlasmaLower, the leading N-by-N lower triangular * part of A contains the lower triangular part of the matrix * A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value == 0, the transformed matrix, * stored in the same format as A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the triangular factor from the Cholesky * factorization of B, as returned by PLASMA_DPOTRF. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_dsygst_Tile * @sa PLASMA_dsygst_Tile_Async * @sa PLASMA_chegst * @sa PLASMA_dsygst * @sa PLASMA_ssygst * ******************************************************************************/ int PLASMA_dsygst(PLASMA_enum itype, PLASMA_enum uplo, int N, double *A, int LDA, double *B, int LDB) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dsygst", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (itype != 1 && itype != 2 && itype != 3) { plasma_error("PLASMA_dsygst", "Illegal value of itype"); return -1; } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_dsygst", "Illegal value of uplo"); return -2; } if (N < 0) { plasma_error("PLASMA_dsygst", "illegal value of N"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_dsygst", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_dsygst", "illegal value of LDB"); return -7; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_DSYGST, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) ); plasma_dooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N, plasma_desc_mat_free(&(descB)) ); } else { plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N); plasma_diplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N); } /* Call the tile interface */ PLASMA_dsygst_Tile_Async(itype, uplo, &descA, &descB, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooptile2lap( descA, A, NB, NB, LDA, N ); plasma_dooptile2lap( descB, B, NB, NB, LDB, N ); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); plasma_desc_mat_free(&descB); } else { plasma_diptile2lap( descA, A, NB, NB, LDA, N ); plasma_diptile2lap( descB, B, NB, NB, LDB, N ); plasma_dynamic_sync(); } status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }