/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zlaswp_Tile_Async - performs a series of row interchanges * on the matrix A. One row interchange is initiated for each of * rows K1 through K2 of A. * Non-blocking equivalent of PLASMA_zlaswp_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zlaswp * @sa PLASMA_zlaswp_Tile * @sa PLASMA_claswp_Tile_Async * @sa PLASMA_dlaswp_Tile_Async * @sa PLASMA_slaswp_Tile_Async * @sa PLASMA_zgetrf_Tile_Async * ******************************************************************************/ int PLASMA_zlaswp_Tile_Async(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (K1 != 1) || (K2 != descA.m) ) { plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } plasma_dynamic_call_3( plasma_pzbarrier_tl2pnl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); /* swap */ plasma_dynamic_call_5( plasma_pzlaswp, PLASMA_desc, descA, int *, IPIV, int, INCX, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_3( plasma_pzbarrier_pnl2tl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup double_Tile_Async * * PLASMA_dsygst_Tile_Async - reduces a complex Hermitian-definite * generalized eigenproblem to standard form. * If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is * overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) * If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x * = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must * have been previously factorized as U**T*U or L*L**T by * PLASMA_DPOTRF. * ONLY PlasmaItype == 1 and PlasmaLower supported! * Non-blocking equivalent of PLASMA_dsygst_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_dsygst * @sa PLASMA_dsygst_Tile * @sa PLASMA_chegst_Tile_Async * @sa PLASMA_dsygst_Tile_Async * @sa PLASMA_ssygst_Tile_Async * @sa PLASMA_dsygv_Tile_Async * ******************************************************************************/ int PLASMA_dsygst_Tile_Async(PLASMA_enum itype, PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; PLASMA_desc descB = *B; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (plasma_desc_check(&descB) != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst_Tile", "invalid second descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_dsygst_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* * Transform Hermitian-definite generalized eigenproblem * to standard form */ plasma_dynamic_call_6(plasma_pdsygst, PLASMA_enum, itype, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_desc, descB, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t_Tile_Async * * PLASMA_cpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric * positive definite or Hermitian positive definite matrix. * Non-blocking equivalent of PLASMA_cpotrf_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_cpotrf * @sa PLASMA_cpotrf_Tile * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_dpotrf_Tile_Async * @sa PLASMA_spotrf_Tile_Async * @sa PLASMA_cpotrs_Tile_Async * ******************************************************************************/ int PLASMA_cpotrf_Tile_Async(PLASMA_enum uplo, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_cpotrf_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_cpotrf_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_cpotrf_Tile", "illegal value of uplo"); return plasma_request_fail(sequence, request, -1); } /* Quick return */ /* if (max(N, 0) == 0) return PLASMA_SUCCESS; */ plasma_parallel_call_4(plasma_pcpotrf, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup float_Tile_Async * * PLASMA_splgsy_Tile_Async - Generate a random hermitian matrix by tiles. * Non-blocking equivalent of PLASMA_splgsy_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_splgsy * @sa PLASMA_splgsy_Tile * @sa PLASMA_cplgsy_Tile_Async * @sa PLASMA_dplgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * ******************************************************************************/ int PLASMA_splgsy_Tile_Async( float bump, PLASMA_desc *A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_splgsy_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ if (min( descA.m, descA.n ) == 0) return PLASMA_SUCCESS; plasma_parallel_call_5(plasma_psplgsy, float, bump, PLASMA_desc, descA, unsigned long long int, seed, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zgetri_Tile - Computes the inverse of a matrix using the LU factorization * computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * Tile equivalent of PLASMA_zgetri(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in,out] A * On entry, the triangular factor L or U from the * factorization A = P*L*U as computed by PLASMA_zgetrf. * On exit, if return value = 0, the inverse of the original * matrix A. * * @param[in] IPIV * The pivot indices that define the permutations * as returned by PLASMA_zgetrf. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval >0 if i, the (i,i) element of the factor U is * exactly zero; The matrix is singular * and its inverse could not be computed. * ******************************************************************************* * * @sa PLASMA_zgetri * @sa PLASMA_zgetri_Tile_Async * @sa PLASMA_cgetri_Tile * @sa PLASMA_dgetri_Tile * @sa PLASMA_sgetri_Tile * @sa PLASMA_zgetrf_Tile * ******************************************************************************/ int PLASMA_zgetri_Tile(PLASMA_desc *A, int *IPIV) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descW; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); /* Allocate workspace */ PLASMA_Alloc_Workspace_zgetri_Tile_Async(A, &descW); PLASMA_zgetri_Tile_Async(A, IPIV, &descW, sequence, &request); plasma_dynamic_sync(); plasma_desc_mat_free(&(descW)); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * Main thread control **/ void *plasma_parallel_section(void *plasma_ptr) { plasma_context_t *plasma = (plasma_context_t*)(plasma_ptr); PLASMA_enum action; /* Set thread affinity for the worker */ plasma_setaffinity(plasma->thread_bind[plasma_rank(plasma)]); plasma_barrier(plasma); while(1) { pthread_mutex_lock(&plasma->action_mutex); while ((action = plasma->action) == PLASMA_ACT_STAND_BY) pthread_cond_wait(&plasma->action_condt, &plasma->action_mutex); pthread_mutex_unlock(&plasma->action_mutex); plasma_barrier(plasma); switch (action) { case PLASMA_ACT_PARALLEL: plasma->parallel_func_ptr(plasma); break; case PLASMA_ACT_DYNAMIC: QUARK_Worker_Loop(plasma->quark, plasma_rank(plasma)); break; case PLASMA_ACT_FINALIZE: return NULL; default: plasma_fatal_error("plasma_parallel_section", "undefined action"); return NULL; } plasma_barrier(plasma); } plasma_unsetaffinity(); return NULL; }
/** **************************************************************************** * * @ingroup InPlaceTransformation * * PLASMA_dgecfi convert the matrice A in place from format f_in to * format f_out * ******************************************************************************* * * @param[in] m * Number of rows of matrix A * * @param[in] n * Number of columns of matrix A * * @param[in,out] A * Matrix of size L*m*n * * @param[in] f_in * Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] imb * Number of rows of each block in original format * * @param[in] inb * Number of columns of each block in original format * * @param[in] f_out * Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] omb * Number of rows of each block in requested format * * @param[in] onb * Number of columns of each block in requested format * ******************************************************************************* * * @sa PLASMA_dgecfi_Async * ******************************************************************************/ int PLASMA_dgecfi(int m, int n, double *A, PLASMA_enum f_in, int imb, int inb, PLASMA_enum f_out, int omb, int onb) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error(__func__, "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_dgecfi_Async( m, n, A, f_in, imb, inb, f_out, omb, onb, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zgetrf_nopiv_Tile_Async - Computes the tile LU factorization of a * matrix. Non-blocking equivalent of PLASMA_zgetrf_nopiv_Tile(). May return * before the computation is finished. Allows for pipelining of operations ar * runtime. * ******************************************************************************* * * @param[in,out] A * On entry, the M-by-N matrix to be factored. * On exit, the tile factors L and U from the factorization. * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zgetrf_nopiv * @sa PLASMA_zgetrf_nopiv_Tile * @sa PLASMA_cgetrf_nopiv_Tile_Async * @sa PLASMA_dgetrf_nopiv_Tile_Async * @sa PLASMA_sgetrf_nopiv_Tile_Async * @sa PLASMA_zgetrs_Tile_Async * ******************************************************************************/ int PLASMA_zgetrf_nopiv_Tile_Async(PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(A) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetrf_nopiv_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descA = *A; } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_zgetrf_nopiv_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } plasma_dynamic_call_3(plasma_pzgetrf_nopiv, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ibnb_tile(int M, int N, PLASMA_enum func, int type, PLASMA_desc **desc) { int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb_tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && ((func == PLASMA_FUNC_SGELS) || (func == PLASMA_FUNC_DGELS) || (func == PLASMA_FUNC_CGELS) || (func == PLASMA_FUNC_ZGELS) || (func == PLASMA_FUNC_SGESVD) || (func == PLASMA_FUNC_DGESVD) || (func == PLASMA_FUNC_CGESVD) || (func == PLASMA_FUNC_ZGESVD))) NT *= 2; /* Allocate and initialize descriptor */ *desc = (PLASMA_desc*)malloc(sizeof(PLASMA_desc)); if (*desc == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } **desc = plasma_desc_init(type, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); /* Allocate matrix */ if (plasma_desc_mat_alloc(*desc)) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } /* Check that everything is ok */ status = plasma_desc_check(*desc); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "invalid descriptor"); return status; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive * definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky * factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] A * The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, * computed by PLASMA_zpotrf. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zpotrs_Tile * @sa PLASMA_zpotrs_Tile_Async * @sa PLASMA_cpotrs * @sa PLASMA_dpotrs * @sa PLASMA_spotrs * @sa PLASMA_zpotrf * ******************************************************************************/ int PLASMA_zpotrs(PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_zpotrs", "illegal value of uplo"); return -1; } if (N < 0) { plasma_error("PLASMA_zpotrs", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_zpotrs", "illegal value of NRHS"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zpotrs", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_zpotrs", "illegal value of LDB"); return -7; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zpotrs", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_splgsy - Generate a random hermitian matrix by tiles. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[out] A * On exit, The random hermitian matrix A generated. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_splgsy_Tile * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy * @sa PLASMA_dplgsy * @sa PLASMA_splgsy * @sa PLASMA_splrnt * @sa PLASMA_splgsy * ******************************************************************************/ int PLASMA_splgsy( float bump, int N, float *A, int LDA, unsigned long long int seed ) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_splgsy", "illegal value of N"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_splgsy", "illegal value of LDA"); return -4; } /* Quick return */ if (max(0, N) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SGEMM, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); descA = plasma_desc_init( PlasmaRealFloat, NB, NB, NB*NB, LDA, N, 0, 0, N, N); descA.mat = A; /* Call the tile interface */ PLASMA_splgsy_Tile_Async( bump, &descA, seed, sequence, &request ); plasma_siptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * **/ int plasma_alloc_ibnb(int M, int N, PLASMA_enum func, int type, void **memptr) { size_t size; int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && (func == PLASMA_FUNC_SGELS || func == PLASMA_FUNC_DGELS || func == PLASMA_FUNC_CGELS || func == PLASMA_FUNC_ZGELS || func == PLASMA_FUNC_SGESVD || func == PLASMA_FUNC_DGESVD || func == PLASMA_FUNC_CGESVD || func == PLASMA_FUNC_ZGESVD )) NT *= 2; size = (size_t)MT*NT*IB*NB * plasma_element_size(type); if (size <= 0) { *memptr = NULL; return PLASMA_SUCCESS; } // status = posix_memalign(memptr, STANDARD_PAGE_SIZE, size); *memptr = malloc(size); // if (status != 0) { if (*memptr == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Dealloc_Handle - Deallocate workspace handle allocated by any workspace allocation routine. * ******************************************************************************* * * @param[in] handle * Workspace handle * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Dealloc_Handle(void **handle) { plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Dealloc_Handle", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (*handle == NULL) { plasma_error("PLASMA_Dealloc_Handle", "attempting to deallocate a NULL handle"); return PLASMA_ERR_UNALLOCATED; } free(*handle); *handle = NULL; return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zlansy_Tile - Tile equivalent of PLASMA_zlansy(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] norm * = PlasmaMaxNorm: Max norm * = PlasmaOneNorm: One norm * = PlasmaInfNorm: Infinity norm * = PlasmaFrobeniusNorm: Frobenius norm * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] A * On entry, the triangular factor U or L. * On exit, if UPLO = 'U', the upper triangle of A is * overwritten with the upper triangle of the product U * U'; * if UPLO = 'L', the lower triangle of A is overwritten with * the lower triangle of the product L' * L. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zlansy * @sa PLASMA_zlansy_Tile_Async * @sa PLASMA_clansy_Tile * @sa PLASMA_dlansy_Tile * @sa PLASMA_slansy_Tile * ******************************************************************************/ double PLASMA_zlansy_Tile(PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; double value; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_zlansy_Tile_Async(norm, uplo, A, &value, sequence, &request); plasma_dynamic_sync(); plasma_sequence_destroy(plasma, sequence); return value; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zlaswp_Tile - performs a series of row interchanges on the matrix A. * One row interchange is initiated for each of rows K1 through K2 of A. * Tile equivalent of PLASMA_zlaswp(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A * The tile factors L and U from the factorization, computed by PLASMA_zgetrf. * * @param[in] K1 * The first element of IPIV for which a row interchange will * be done. * * @param[in] K2 * The last element of IPIV for which a row interchange will * be done. * * @param[in] IPIV * The pivot indices from PLASMA_zgetrf. * * @param[in] INCX * The increment between successive values of IPIV. If IPIV * is negative, the pivots are applied in reverse order. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zlaswp * @sa PLASMA_zlaswp_Tile_Async * @sa PLASMA_claswp_Tile * @sa PLASMA_dlaswp_Tile * @sa PLASMA_slaswp_Tile * @sa PLASMA_zgetrf_Tile * ******************************************************************************/ int PLASMA_zlaswp_Tile(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_zlaswp_Tile_Async(A, K1, K2, IPIV, INCX, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t_Tile * * PLASMA_cpotrf_Tile - Computes the Cholesky factorization of a symmetric positive definite * or Hermitian positive definite matrix. * Tile equivalent of PLASMA_cpotrf(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] A * On entry, the symmetric positive definite (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A * contains the upper triangular part of the matrix A, and the strictly lower triangular * part of A is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization * A = U**H*U or A = L*L**H. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* * * @sa PLASMA_cpotrf * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_cpotrf_Tile * @sa PLASMA_dpotrf_Tile * @sa PLASMA_spotrf_Tile * @sa PLASMA_cpotrs_Tile * ******************************************************************************/ int PLASMA_cpotrf_Tile(PLASMA_enum uplo, PLASMA_desc *A) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_cpotrf_Tile_Async(uplo, A, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float_Tile * * PLASMA_splgsy_Tile - Generate a random hermitian matrix by tiles. * Tile equivalent of PLASMA_splgsy(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] A * On exit, The random hermitian matrix A generated. * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_splgsy * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy_Tile * @sa PLASMA_dplgsy_Tile * @sa PLASMA_splgsy_Tile * @sa PLASMA_splrnt_Tile * @sa PLASMA_splgsy_Tile * ******************************************************************************/ int PLASMA_splgsy_Tile( float bump, PLASMA_desc *A, unsigned long long int seed ) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_splgsy_Tile_Async( bump, A, seed, sequence, &request ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Dealloc_Handle_Tile - Deallocate Tile workspace handle allocated by any tile workspace allocation routine. * ******************************************************************************* * * @param[in] desc * Descriptot handle * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Dealloc_Handle_Tile(PLASMA_desc **desc) { plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Dealloc_Handle_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (*desc == NULL) { plasma_error("PLASMA_Dealloc_Handle_Tile", "attempting to deallocate a NULL descriptor"); return PLASMA_ERR_UNALLOCATED; } if ((*desc)->mat == NULL) { plasma_error("PLASMA_Dealloc_Handle_Tile", "attempting to deallocate a NULL pointer"); return PLASMA_ERR_UNALLOCATED; } free((*desc)->mat); free(*desc); *desc = NULL; return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ipiv(int M, int N, PLASMA_enum func, void **memptr) { size_t size; int status; int NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ipiv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ipiv", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; NT = (N%NB==0) ? (N/NB) : ((N/NB)+1); MT = (M%NB==0) ? (M/NB) : ((M/NB)+1); size = (size_t)MT*NT * NB * sizeof(int); if (size <= 0) { *memptr = NULL; return PLASMA_SUCCESS; } // status = posix_memalign(memptr, CACHE_LINE_SIZE, size); *memptr = malloc(size); // if (status != 0) { if (*memptr == NULL) { plasma_error("plasma_alloc_ipiv", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup float_Tile_Async * * PLASMA_ssytrd_Tile_Async - Computes all eigenvalues and, * optionally, eigenvectors of a complex Hermitian matrix A using a * two-stage approach: * First stage: reduction to band tridiagonal form; * Second stage: reduction from band to tridiagonal form. * * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_ssytrd * @sa PLASMA_ssytrd_Tile * @sa PLASMA_chetrd_Tile_Async * @sa PLASMA_dsytrd_Tile_Async * @sa PLASMA_ssytrd_Tile_Async * ******************************************************************************/ int PLASMA_ssytrd_Tile_Async(PLASMA_enum jobz, PLASMA_enum uplo, PLASMA_desc *A, float *D, float *E, PLASMA_desc *T, PLASMA_desc *Q, PLASMA_sequence *sequence, PLASMA_request *request) { int NB, IB, IBNB, NT; PLASMA_desc descA = *A; PLASMA_desc descT = *T; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Set NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (descA.ln%NB==0) ? (descA.ln/NB) : (descA.ln/NB+1); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (plasma_desc_check(&descT) != PLASMA_SUCCESS) { plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (jobz == PlasmaVec) && (plasma_desc_check(Q) != PLASMA_SUCCESS) ) { plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (jobz != PlasmaNoVec && jobz != PlasmaVec) { plasma_error("PLASMA_ssytrd_Tile_Async", "illegal value of jobz"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (descA.m != descA.n) { plasma_error("PLASMA_ssytrd_Tile_Async", "matrix need to be square"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (descA.nb != descA.mb) { plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (jobz == PlasmaVec) { plasma_error("PLASMA_ssytrd_Tile_Async", "computing the eigenvectors is not supported in this version"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) { plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Reduction to tridiagonal form * with a two-stage approach. */ /* Reduction to BAND tridiagonal form */ plasma_dynamic_call_5(plasma_pssyrbt, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_desc, descT, PLASMA_sequence*, sequence, PLASMA_request*, request); /* * Build the Q of the first stage */ /* if (jobz == PlasmaVec){ */ /* /\* Initialize Q to Identity *\/ */ /* plasma_dynamic_call_6(plasma_pslaset, */ /* PLASMA_enum, PlasmaUpperLower, */ /* float, 0.0, */ /* float, 1.0, */ /* PLASMA_desc, descQ, */ /* PLASMA_sequence*, sequence, */ /* PLASMA_request*, request); */ /* /\* Accumulate the transformations from the first stage*\/ */ /* plasma_dynamic_call_6(plasma_psorgtr, */ /* PLASMA_enum, uplo, */ /* PLASMA_desc, descA, */ /* PLASMA_desc, descQ, */ /* PLASMA_desc, descT, */ /* PLASMA_sequence*, sequence, */ /* PLASMA_request*, request); */ /* } */ /* Set the V's to zero before the 2nd stage (bulge chasing) */ /* */ plasma_dynamic_call_5(plasma_pslaset2, PLASMA_enum, uplo, float, 0.0, PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) : plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb), PLASMA_sequence*, sequence, PLASMA_request*, request); /* Reduction from BAND tridiagonal to the final condensed form */ plasma_dynamic_call_7(plasma_pssbrdt, PLASMA_enum, uplo, PLASMA_desc, descA, float*, D, float*, E, PLASMA_desc, descT, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/** **************************************************************************** * * @ingroup InPlaceTransformation * * PLASMA_dgecfi_Async convert the matrice A in place from format f_in to * format f_out * ******************************************************************************* * * @param[in] m * Number of rows of matrix A * * @param[in] n * Number of columns of matrix A * * @param[in,out] A * Matrix of size L*m*n * * @param[in] f_in * Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] imb * Number of rows of each block in original format * * @param[in] inb * Number of columns of each block in original format * * @param[in] f_out * Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] omb * Number of rows of each block in requested format * * @param[in] onb * Number of columns of each block in requested format * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_dgecfi * ******************************************************************************/ int PLASMA_dgecfi_Async(int m, int n, double *A, PLASMA_enum f_in, int imb, int inb, PLASMA_enum f_out, int omb, int onb, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; double *W = NULL; int im1, in1, om1, on1; size_t A11, A21, A12, A22; /* Check Plasma context */ plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error(__func__, "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check arguments */ if( ( f_in != PlasmaCM ) && ( f_in != PlasmaRM ) && ( f_in != PlasmaCCRB ) && ( f_in != PlasmaRRRB ) && ( f_in != PlasmaCRRB ) && ( f_in != PlasmaRCRB ) ) { plasma_error(__func__, "Input format unknown"); return -4; } if( ( f_out != PlasmaCM ) && ( f_out != PlasmaRM ) && ( f_out != PlasmaCCRB ) && ( f_out != PlasmaRRRB ) && ( f_out != PlasmaCRRB ) && ( f_out != PlasmaRCRB ) ) { plasma_error(__func__, "Input format unknown"); return -7; } /* quick return */ if( (f_in == f_out) && ( (f_in == PlasmaCM) || (f_in == PlasmaRM)) && (imb == omb) && ( inb == onb ) ) { return PLASMA_SUCCESS; } if ( (f_in == PlasmaCM) || (f_in == PlasmaRM) ) { if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) ){ imb = omb = PLASMA_NB; inb = onb = PLASMA_NB; } else { imb = omb; inb = onb; } } else if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) ) { omb = imb; onb = inb; } /* calculate number of full blocks */ im1 = (m / imb) * imb; in1 = (n / inb) * inb; om1 = (m / omb) * omb; on1 = (n / onb) * onb; /* separate the four submatrices A11, A12, A21, A22 */ if( f_in == PlasmaCM ) { if( om1 < m ) { plasma_static_call_6(plasma_pdpack, int, m, int, on1, double*, A, int, (m-om1), PLASMA_sequence*, sequence, PLASMA_request*, request); if ( on1 < n) { plasma_static_call_6(plasma_pdpack, int, m, int, (n-on1), double*, &(A[m*on1]), int, (m-om1), PLASMA_sequence*, sequence, PLASMA_request*, request); } }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Finalize - Finalize PLASMA. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Finalize() { int core; int status; void *exitcodep; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Terminate the dynamic scheduler */ plasma_dynamic_sync(); /* Free quark structures */ QUARK_Free(plasma->quark); /* Set termination action */ pthread_mutex_lock(&plasma->action_mutex); plasma->action = PLASMA_ACT_FINALIZE; pthread_mutex_unlock(&plasma->action_mutex); pthread_cond_broadcast(&plasma->action_condt); /* Barrier and clear action */ plasma_barrier(plasma); plasma->action = PLASMA_ACT_STAND_BY; // Join threads for (core = 1; core < plasma->world_size; core++) { status = pthread_join(plasma->thread_id[core], &exitcodep); if (status != 0) { plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed"); return status; } } plasma_barrier_finalize(plasma); plasma_barrier_bw_finalize(plasma); /* Unbind main thread */ plasma_unsetaffinity(); /* Destroy thread attributes */ status = pthread_attr_destroy(&plasma->thread_attr); if (status != 0) plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed"); /* Destroy topology */ plasma_topology_finalize(); status = plasma_context_remove(plasma, pthread_self()); if (status != PLASMA_SUCCESS) { plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed"); return status; } /* Restore the concurency */ /* actually it's really bad, we shoulde set the concurrency only * if it's not already done and restore it only we had change it */ pthread_setconcurrency( 0 ); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Init_Affinity - Initialize PLASMA. * ******************************************************************************* * * @param[in] cores * Number of cores to use (threads to launch). * If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the * system number of core otherwise. * * @param[in] coresbind * Array to specify where to bind each thread. * Each thread i is binded to coresbind[hwloc(i)] if hwloc is * provided, or to coresbind[i] otherwise. * If coresbind = NULL, coresbind = PLASMA_AFF_THREADS if it * is set, the identity function otherwise. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Init_Affinity(int cores, int *coresbind) { plasma_context_t *plasma; int status; int core; /* Create context and insert in the context map */ plasma = plasma_context_create(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Init", "plasma_context_create() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } status = plasma_context_insert(plasma, pthread_self()); if (status != PLASMA_SUCCESS) { plasma_fatal_error("PLASMA_Init", "plasma_context_insert() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } /* Init number of cores and topology */ plasma_topology_init(); /* Set number of cores */ if ( cores < 1 ) { plasma->world_size = plasma_get_numthreads(); if ( plasma->world_size == -1 ) { plasma->world_size = 1; plasma_warning("PLASMA_Init", "Could not find the number of cores: the thread number is set to 1"); } } else plasma->world_size = cores; if (plasma->world_size <= 0) { plasma_fatal_error("PLASMA_Init", "failed to get system size"); return PLASMA_ERR_NOT_FOUND; } /* Check if not more cores than the hard limit */ if (plasma->world_size > CONTEXT_THREADS_MAX) { plasma_fatal_error("PLASMA_Init", "not supporting so many cores"); return PLASMA_ERR_INTERNAL_LIMIT; } /* Get the size of each NUMA node */ plasma->group_size = plasma_get_numthreads_numa(); while ( ((plasma->world_size)%(plasma->group_size)) != 0 ) (plasma->group_size)--; /* Initialize barriers */ plasma_barrier_init(plasma); plasma_barrier_bw_init(plasma); /* Initialize default thread attributes */ status = pthread_attr_init(&plasma->thread_attr); if (status != 0) { plasma_fatal_error("PLASMA_Init", "pthread_attr_init() failed"); return status; } /* Set scope to system */ status = pthread_attr_setscope(&plasma->thread_attr, PTHREAD_SCOPE_SYSTEM); if (status != 0) { plasma_fatal_error("PLASMA_Init", "pthread_attr_setscope() failed"); return status; } /* Set concurrency */ status = pthread_setconcurrency(plasma->world_size); if (status != 0) { plasma_fatal_error("PLASMA_Init", "pthread_setconcurrency() failed"); return status; } /* Launch threads */ memset(plasma->thread_id, 0, CONTEXT_THREADS_MAX*sizeof(pthread_t)); if (coresbind != NULL) { memcpy(plasma->thread_bind, coresbind, plasma->world_size*sizeof(int)); } else { plasma_get_affthreads(plasma->thread_bind); } /* Assign rank and thread ID for the master */ plasma->thread_rank[0] = 0; plasma->thread_id[0] = pthread_self(); for (core = 1; core < plasma->world_size; core++) { plasma->thread_rank[core] = core; pthread_create( &plasma->thread_id[core], &plasma->thread_attr, plasma_parallel_section, (void*)plasma); } /* Ensure BLAS are sequential and set thread affinity for the master */ #if defined(PLASMA_WITH_MKL) #if defined(__ICC) || defined(__INTEL_COMPILER) kmp_set_defaults("KMP_AFFINITY=disabled"); #endif #endif /* Initialize the dynamic scheduler */ plasma->quark = QUARK_Setup(plasma->world_size); plasma_barrier(plasma); plasma_setlapack_sequential(plasma); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_csyrk - Performs one of the hermitian rank k operations * * \f[ C = \alpha [ op( A ) \times conjfg( op( A )' )] + \beta C \f], * * where op( X ) is one of * * op( X ) = X or op( X ) = conjfg( X' ) * * where alpha and beta are real scalars, C is an n-by-n hermitian * matrix and A is an n-by-k matrix in the first case and a k-by-n * matrix in the second case. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of C is stored; * = PlasmaLower: Lower triangle of C is stored. * * @param[in] trans * Specifies whether the matrix A is transposed or conjfugate transposed: * = PlasmaNoTrans: A is not transposed; * = PlasmaTrans : A is transposed. * * @param[in] N * N specifies the order of the matrix C. N must be at least zero. * * @param[in] K * K specifies the number of columns of the matrix op( A ). * * @param[in] alpha * alpha specifies the scalar alpha. * * @param[in] A * A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans, * and is N otherwise. * * @param[in] LDA * The leading dimension of the array A. LDA must be at least * max( 1, N ), otherwise LDA must be at least max( 1, K ). * * @param[in] beta * beta specifies the scalar beta * * @param[in,out] C * C is a LDC-by-N matrix. * On exit, the array uplo part of the matrix is overwritten * by the uplo part of the updated matrix. * * @param[in] LDC * The leading dimension of the array C. LDC >= max( 1, N ). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_csyrk_Tile * @sa PLASMA_csyrk * @sa PLASMA_dsyrk * @sa PLASMA_ssyrk * ******************************************************************************/ int PLASMA_csyrk(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC) { int NB; int Am, An; int status; PLASMA_desc descA, descC; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_csyrk", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) { plasma_error("PLASMA_csyrk", "illegal value of uplo"); return -1; } if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) { plasma_error("PLASMA_csyrk", "illegal value of trans"); return -2; } if ( trans == PlasmaNoTrans ) { Am = N; An = K; } else { Am = K; An = N; } if (N < 0) { plasma_error("PLASMA_csyrk", "illegal value of N"); return -3; } if (K < 0) { plasma_error("PLASMA_csyrk", "illegal value of K"); return -4; } if (LDA < max(1, Am)) { plasma_error("PLASMA_csyrk", "illegal value of LDA"); return -7; } if (LDC < max(1, N)) { plasma_error("PLASMA_csyrk", "illegal value of LDC"); return -10; } /* Quick return */ if (N == 0 || ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (PLASMA_Complex32_t)1.0)) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_CSYRK, N, K, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_csyrk", "plasma_tune() failed"); return status; } /* Set MT & NT & KT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) ); plasma_cooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_sgemm - Performs one of the matrix-matrix operations * * \f[ C = \alpha [op( A )\times op( B )] + \beta C \f], * * where op( X ) is one of * * op( X ) = X or op( X ) = X' or op( X ) = g( X' ) * * alpha and beta are scalars, and A, B and C are matrices, with op( A ) * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. * ******************************************************************************* * * @param[in] transA * Specifies whether the matrix A is transposed, not transposed or ugate transposed: * = PlasmaNoTrans: A is not transposed; * = PlasmaTrans: A is transposed; * = PlasmaTrans: A is ugate transposed. * * @param[in] transB * Specifies whether the matrix B is transposed, not transposed or ugate transposed: * = PlasmaNoTrans: B is not transposed; * = PlasmaTrans: B is transposed; * = PlasmaTrans: B is ugate transposed. * * @param[in] M * M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0. * * @param[in] N * N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0. * * @param[in] K * K specifies the number of columns of the matrix op( A ) and the number of rows of * the matrix op( B ). K >= 0. * * @param[in] alpha * alpha specifies the scalar alpha * * @param[in] A * A is a LDA-by-ka matrix, where ka is K when transA = PlasmaNoTrans, * and is M otherwise. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] B * B is a LDB-by-kb matrix, where kb is N when transB = PlasmaNoTrans, * and is K otherwise. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * * @param[in] beta * beta specifies the scalar beta * * @param[in,out] C * C is a LDC-by-N matrix. * On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C ) * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_sgemm_Tile * @sa PLASMA_cgemm * @sa PLASMA_dgemm * @sa PLASMA_sgemm * ******************************************************************************/ int PLASMA_sgemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, float alpha, float *A, int LDA, float *B, int LDB, float beta, float *C, int LDC) { int NB; int Am, An, Bm, Bn; int status; PLASMA_desc descA, descB, descC; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_sgemm", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaTrans)) { plasma_error("PLASMA_sgemm", "illegal value of transA"); return -1; } if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaTrans)) { plasma_error("PLASMA_sgemm", "illegal value of transB"); return -2; } if ( transA == PlasmaNoTrans ) { Am = M; An = K; } else { Am = K; An = M; } if ( transB == PlasmaNoTrans ) { Bm = K; Bn = N; } else { Bm = N; Bn = K; } if (M < 0) { plasma_error("PLASMA_sgemm", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_sgemm", "illegal value of N"); return -4; } if (K < 0) { plasma_error("PLASMA_sgemm", "illegal value of N"); return -5; } if (LDA < max(1, Am)) { plasma_error("PLASMA_sgemm", "illegal value of LDA"); return -8; } if (LDB < max(1, Bm)) { plasma_error("PLASMA_sgemm", "illegal value of LDB"); return -10; } if (LDC < max(1, M)) { plasma_error("PLASMA_sgemm", "illegal value of LDC"); return -13; } /* Quick return */ if (M == 0 || N == 0 || ((alpha == (float)0.0 || K == 0) && beta == (float)1.0)) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_SGEMM, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_sgemm", "plasma_tune() failed"); return status; } /* Set MT & NT & KT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) ); plasma_sooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); plasma_sooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgetri - Computes the inverse of a matrix using the LU factorization * computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * ******************************************************************************* * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in,out] A * On entry, the triangular factor L or U from the * factorization A = P*L*U as computed by PLASMA_zgetrf. * On exit, if return value = 0, the inverse of the original * matrix A. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in] IPIV * The pivot indices that define the permutations * as returned by PLASMA_zgetrf. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, the (i,i) element of the factor U is * exactly zero; The matrix is singular * and its inverse could not be computed. * ******************************************************************************* * * @sa PLASMA_zgetri_Tile * @sa PLASMA_zgetri_Tile_Async * @sa PLASMA_cgetri * @sa PLASMA_dgetri * @sa PLASMA_sgetri * @sa PLASMA_zgetrf * ******************************************************************************/ int PLASMA_zgetri(int N, PLASMA_Complex64_t *A, int LDA, int *IPIV) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; PLASMA_desc descW; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_zgetri", "illegal value of N"); return -1; } if (LDA < max(1, N)) { plasma_error("PLASMA_zgetri", "illegal value of LDA"); return -3; } /* Quick return */ if (max(N, 0) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request, plasma_desc_mat_free(&(descA)) ); } else { plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request); } /* Allocate workspace */ PLASMA_Alloc_Workspace_zgetri_Tile_Async(&descA, &descW); /* Call the tile interface */ PLASMA_zgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); plasma_desc_mat_free(&descA); } else { plasma_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request); plasma_dynamic_sync(); } plasma_desc_mat_free(&(descW)); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zgetri_Tile_Async - Computes the inverse of a matrix using the LU * factorization computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * Non-blocking equivalent of PLASMA_zgetri_Tile(). * May return before the computation is finished. * Allows for pipelining of operations at runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zgetri * @sa PLASMA_zgetri_Tile * @sa PLASMA_cgetri_Tile_Async * @sa PLASMA_dgetri_Tile_Async * @sa PLASMA_sgetri_Tile_Async * @sa PLASMA_zgetrf_Tile_Async * ******************************************************************************/ int PLASMA_zgetri_Tile_Async(PLASMA_desc *A, int *IPIV, PLASMA_desc *W, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA; PLASMA_desc descW; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile_Async", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(A) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri_Tile_Async", "invalid A descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descA = *A; } /* Check descriptors for correctness */ if (plasma_desc_check(W) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetri_Tile_Async", "invalid W descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descW = *W; } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_zgetri_Tile_Async", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ if (max(descA.m, 0) == 0) return PLASMA_SUCCESS; plasma_dynamic_call_5(plasma_pztrtri, PLASMA_enum, PlasmaUpper, PLASMA_enum, PlasmaNonUnit, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_9(plasma_pztrsmrv, PLASMA_enum, PlasmaRight, PLASMA_enum, PlasmaLower, PLASMA_enum, PlasmaNoTrans, PLASMA_enum, PlasmaUnit, PLASMA_Complex64_t, (PLASMA_Complex64_t) 1.0, PLASMA_desc, descA, PLASMA_desc, descW, PLASMA_sequence*, sequence, PLASMA_request*, request); /* No need for barrier tile2row because of previous dependencies */ /* swap */ plasma_dynamic_call_5( plasma_pzlaswpc, PLASMA_desc, descA, int *, IPIV, int, -1, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_3( plasma_pzbarrier_row2tl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t * * PLASMA_cgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization * A = L*Q computed by PLASMA_cgelqf. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= M >= 0. * * @param[in] NRHS * The number of columns of B. NRHS >= 0. * * @param[in] A * Details of the LQ factorization of the original matrix A as returned by PLASMA_cgelqf. * * @param[in] LDA * The leading dimension of the array A. LDA >= M. * * @param[in] T * Auxiliary factorization data, computed by PLASMA_cgelqf. * * @param[in,out] B * On entry, the M-by-NRHS right hand side matrix B. * On exit, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= N. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_cgelqs_Tile * @sa PLASMA_cgelqs_Tile_Async * @sa PLASMA_cgelqs * @sa PLASMA_dgelqs * @sa PLASMA_sgelqs * @sa PLASMA_cgelqf * ******************************************************************************/ int PLASMA_cgelqs(int M, int N, int NRHS, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *T, PLASMA_Complex32_t *B, int LDB) { int NB, IB, IBNB, MT, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB, descT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cgelqs", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (M < 0) { plasma_error("PLASMA_cgelqs", "illegal value of M"); return -1; } if (N < 0 || M > N) { plasma_error("PLASMA_cgelqs", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_cgelqs", "illegal value of N"); return -3; } if (LDA < max(1, M)) { plasma_error("PLASMA_cgelqs", "illegal value of LDA"); return -5; } if (LDB < max(1, max(1, N))) { plasma_error("PLASMA_cgelqs", "illegal value of LDB"); return -8; } /* Quick return */ if (min(M, min(N, NRHS)) == 0) { return PLASMA_SUCCESS; } /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ status = plasma_tune(PLASMA_FUNC_CGELS, M, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_cgelqs", "plasma_tune() failed"); return status; } /* Set MT, NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { descT = plasma_desc_init( PlasmaComplexFloat, IB, NB, IBNB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); } else { /* Double the size of T to accomodate the tree reduction phase */ descT = plasma_desc_init( PlasmaComplexFloat, IB, NB, IBNB, MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB); } descT.mat = T; if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) ); plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex * M-by-N matrix A, optionally computing the left and/or right singular * vectors. The SVD is written * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A; they are real and non-negative, and * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * * Note that the routine returns V**T, not V. * Not LAPACK Compliant for now! * Note: Only PlasmaNoVec supported! ******************************************************************************* * * @param[in] jobu * Specifies options for computing all or part of the matrix U. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] jobvt * Specifies options for computing all or part of the matrix V**H. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, * if JOBU = 'O', A is overwritten with the first min(m,n) * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) * rows of V**H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] S * The double precision singular values of A, sorted so that S(i) >= S(i+1). * * @param[out] U * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. * If JOBU = 'A', U contains the M-by-M unitary matrix U; * if JOBU = 'S', U contains the first min(m,n) columns of U * (the left singular vectors, stored columnwise); * if JOBU = 'N' or 'O', U is not referenced. * * @param[in] LDU * The leading dimension of the array U. LDU >= 1; if * JOBU = 'S' or 'A', LDU >= M. * * @param[out] VT * If JOBVT = 'A', VT contains the N-by-N unitary matrix * V**H; * if JOBVT = 'S', VT contains the first min(m,n) rows of * V**H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * @param[in] LDVT * The leading dimension of the array VT. LDVT >= 1; if * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd * On exit, contains auxiliary factorization data. * * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zgesvd_Tile * @sa PLASMA_zgesvd_Tile_Async * @sa PLASMA_cgesvd * @sa PLASMA_dgesvd * @sa PLASMA_sgesvd * ******************************************************************************/ int PLASMA_zgesvd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t *A, int LDA, double *S, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *VT, int LDVT, PLASMA_desc *descT) { int NB, IB, IBNB, minMN, MT, NT, minMTNT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descU, descVT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgesvd", "plasma_tune() failed"); return status; } /* Set MT, NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); minMN = min(M,N); minMTNT = min(MT,NT); /* Check input arguments */ if (jobu != PlasmaNoVec && jobu !=PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobu"); return -1; } if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobvt"); return -2; } if (M < 0) { plasma_error("PLASMA_zgesvd", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_zgesvd", "illegal value of N"); return -4; } if (LDA < max(1, M)) { plasma_error("PLASMA_zgesvd", "illegal value of LDA"); return -6; } if (LDU < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDU"); return -9; } if (LDVT < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDVT"); return -11; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != MT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_zgesvd", "invalid T descriptor"); return -12; } /* Quick return */ if (min(M, N) == 0) { return PLASMA_SUCCESS; } if (jobu == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -1; } if (jobvt == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -2; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) ); if (jobu == PlasmaVec){ plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU))); }
/***************************************************************************//** * * @ingroup double * * PLASMA_dtrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ). * ******************************************************************************* * * @param[in] side * Specifies whether A appears on the left or on the right of X: * = PlasmaLeft: A*X = B * = PlasmaRight: X*A = B * * @param[in] uplo * Specifies whether the matrix A is upper triangular or lower triangular: * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] transA * Specifies whether the matrix A is transposed, not transposed or ugate transposed: * = PlasmaNoTrans: A is transposed; * = PlasmaTrans: A is not transposed; * = PlasmaTrans: A is ugate transposed. * * @param[in] diag * Specifies whether or not A is unit triangular: * = PlasmaNonUnit: A is non unit; * = PlasmaUnit: A us unit. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] alpha * alpha specifies the scalar alpha. * * @param[in] A * The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular * part of the array A contains the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N * lower triangular part of the array A contains the lower triangular matrix, and the * strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the * diagonal elements of A are also not referenced and are assumed to be 1. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_dtrmm_Tile * @sa PLASMA_dtrmm_Tile_Async * @sa PLASMA_ctrmm * @sa PLASMA_dtrmm * @sa PLASMA_strmm * ******************************************************************************/ int PLASMA_dtrmm(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N, int NRHS, double alpha, double *A, int LDA, double *B, int LDB) { int NB, NA; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dtrmm", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (side != PlasmaLeft && side != PlasmaRight) { plasma_error("PLASMA_dtrmm", "illegal value of side"); return -1; } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_dtrmm", "illegal value of uplo"); return -2; } if (transA != PlasmaTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) { plasma_error("PLASMA_dtrmm", "illegal value of transA"); return -3; } if (diag != PlasmaUnit && diag != PlasmaNonUnit) { plasma_error("PLASMA_dtrmm", "illegal value of diag"); return -4; } if (N < 0) { plasma_error("PLASMA_dtrmm", "illegal value of N"); return -5; } if (NRHS < 0) { plasma_error("PLASMA_dtrmm", "illegal value of NRHS"); return -6; } if (LDA < max(1, N)) { plasma_error("PLASMA_dtrmm", "illegal value of LDA"); return -8; } if (LDB < max(1, N)) { plasma_error("PLASMA_dtrmm", "illegal value of LDB"); return -10; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_dtrmm", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; if (side == PlasmaLeft) { NA = N; } else { NA = NRHS; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_dooplap2tile( descA, A, NB, NB, LDA, NA, 0, 0, NA, NA, plasma_desc_mat_free(&(descA)) ); plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {