/** **************************************************************************** * * @ingroup InPlaceTransformation * * PLASMA_dgecfi convert the matrice A in place from format f_in to * format f_out * ******************************************************************************* * * @param[in] m * Number of rows of matrix A * * @param[in] n * Number of columns of matrix A * * @param[in,out] A * Matrix of size L*m*n * * @param[in] f_in * Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] imb * Number of rows of each block in original format * * @param[in] inb * Number of columns of each block in original format * * @param[in] f_out * Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM, * PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB) * * @param[in] omb * Number of rows of each block in requested format * * @param[in] onb * Number of columns of each block in requested format * ******************************************************************************* * * @sa PLASMA_dgecfi_Async * ******************************************************************************/ int PLASMA_dgecfi(int m, int n, double *A, PLASMA_enum f_in, int imb, int inb, PLASMA_enum f_out, int omb, int onb) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error(__func__, "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_dgecfi_Async( m, n, A, f_in, imb, inb, f_out, omb, onb, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * **/ void plasma_pdaxpy_quark(double alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int X, Y; int m, n; int ldam, ldbm; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); for (m = 0; m < A.mt; m++) { X = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); for (n = 0; n < A.nt; n++) { Y = n == A.nt-1 ? A.n-n*A.nb : A.nb; QUARK_CORE_daxpy( plasma->quark, &task_flags, X, Y, A.mb, alpha, A(m, n), ldam, B(m, n), ldbm); } } }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zgetri_Tile - Computes the inverse of a matrix using the LU factorization * computed by PLASMA_zgetrf. * This method inverts U and then computes inv(A) by solving the system * inv(A)*L = inv(U) for inv(A). * Tile equivalent of PLASMA_zgetri(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in,out] A * On entry, the triangular factor L or U from the * factorization A = P*L*U as computed by PLASMA_zgetrf. * On exit, if return value = 0, the inverse of the original * matrix A. * * @param[in] IPIV * The pivot indices that define the permutations * as returned by PLASMA_zgetrf. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval >0 if i, the (i,i) element of the factor U is * exactly zero; The matrix is singular * and its inverse could not be computed. * ******************************************************************************* * * @sa PLASMA_zgetri * @sa PLASMA_zgetri_Tile_Async * @sa PLASMA_cgetri_Tile * @sa PLASMA_dgetri_Tile * @sa PLASMA_sgetri_Tile * @sa PLASMA_zgetrf_Tile * ******************************************************************************/ int PLASMA_zgetri_Tile(PLASMA_desc *A, int *IPIV) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descW; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetri_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); /* Allocate workspace */ PLASMA_Alloc_Workspace_zgetri_Tile_Async(A, &descW); PLASMA_zgetri_Tile_Async(A, IPIV, &descW, sequence, &request); plasma_dynamic_sync(); plasma_desc_mat_free(&(descW)); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * Parallel tile Cholesky factorization - dynamic scheduling **/ void plasma_pdplrnt_quark( PLASMA_desc A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request ) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int m, n; int ldam; int tempmm, tempnn; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); for (m = 0; m < A.mt; m++) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); for (n = 0; n < A.nt; n++) { tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; QUARK_CORE_dplrnt( plasma->quark, &task_flags, tempmm, tempnn, A(m, n), ldam, A.m, m*A.mb, n*A.nb, seed ); } } }
/***************************************************************************//** * **/ void plasma_pslag2d_quark(PLASMA_desc SA, PLASMA_desc B, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int X, Y; int m, n; int ldam, ldbm; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); for(m = 0; m < SA.mt; m++) { X = m == SA.mt-1 ? SA.m-m*SA.mb : SA.mb; ldam = BLKLDD(SA, m); ldbm = BLKLDD(B, m); for(n = 0; n < SA.nt; n++) { Y = n == SA.nt-1 ? SA.n-n*SA.nb : SA.nb; QUARK_CORE_slag2d( plasma->quark, &task_flags, X, Y, SA.mb, SA(m, n), ldam, B(m, n), ldbm); } } }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zlaswp_Tile_Async - performs a series of row interchanges * on the matrix A. One row interchange is initiated for each of * rows K1 through K2 of A. * Non-blocking equivalent of PLASMA_zlaswp_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zlaswp * @sa PLASMA_zlaswp_Tile * @sa PLASMA_claswp_Tile_Async * @sa PLASMA_dlaswp_Tile_Async * @sa PLASMA_slaswp_Tile_Async * @sa PLASMA_zgetrf_Tile_Async * ******************************************************************************/ int PLASMA_zlaswp_Tile_Async(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if ( (K1 != 1) || (K2 != descA.m) ) { plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } plasma_dynamic_call_3( plasma_pzbarrier_tl2pnl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); /* swap */ plasma_dynamic_call_5( plasma_pzlaswp, PLASMA_desc, descA, int *, IPIV, int, INCX, PLASMA_sequence*, sequence, PLASMA_request*, request); plasma_dynamic_call_3( plasma_pzbarrier_pnl2tl, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup double_Tile_Async * * PLASMA_dsygst_Tile_Async - reduces a complex Hermitian-definite * generalized eigenproblem to standard form. * If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is * overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) * If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x * = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must * have been previously factorized as U**T*U or L*L**T by * PLASMA_DPOTRF. * ONLY PlasmaItype == 1 and PlasmaLower supported! * Non-blocking equivalent of PLASMA_dsygst_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_dsygst * @sa PLASMA_dsygst_Tile * @sa PLASMA_chegst_Tile_Async * @sa PLASMA_dsygst_Tile_Async * @sa PLASMA_ssygst_Tile_Async * @sa PLASMA_dsygv_Tile_Async * ******************************************************************************/ int PLASMA_dsygst_Tile_Async(PLASMA_enum itype, PLASMA_enum uplo, PLASMA_desc *A, PLASMA_desc *B, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; PLASMA_desc descB = *B; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_dsygst_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (plasma_desc_check(&descB) != PLASMA_SUCCESS) { plasma_error("PLASMA_dsygst_Tile", "invalid second descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_dsygst_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* * Transform Hermitian-definite generalized eigenproblem * to standard form */ plasma_dynamic_call_6(plasma_pdsygst, PLASMA_enum, itype, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_desc, descB, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ibnb_tile(int M, int N, PLASMA_enum func, int type, PLASMA_desc **desc) { int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb_tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && ((func == PLASMA_FUNC_SGELS) || (func == PLASMA_FUNC_DGELS) || (func == PLASMA_FUNC_CGELS) || (func == PLASMA_FUNC_ZGELS) || (func == PLASMA_FUNC_SGESVD) || (func == PLASMA_FUNC_DGESVD) || (func == PLASMA_FUNC_CGESVD) || (func == PLASMA_FUNC_ZGESVD))) NT *= 2; /* Allocate and initialize descriptor */ *desc = (PLASMA_desc*)malloc(sizeof(PLASMA_desc)); if (*desc == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } **desc = plasma_desc_init(type, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); /* Allocate matrix */ if (plasma_desc_mat_alloc(*desc)) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } /* Check that everything is ok */ status = plasma_desc_check(*desc); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb_tile", "invalid descriptor"); return status; } return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive * definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky * factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] A * The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, * computed by PLASMA_zpotrf. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zpotrs_Tile * @sa PLASMA_zpotrs_Tile_Async * @sa PLASMA_cpotrs * @sa PLASMA_dpotrs * @sa PLASMA_spotrs * @sa PLASMA_zpotrf * ******************************************************************************/ int PLASMA_zpotrs(PLASMA_enum uplo, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_zpotrs", "illegal value of uplo"); return -1; } if (N < 0) { plasma_error("PLASMA_zpotrs", "illegal value of N"); return -2; } if (NRHS < 0) { plasma_error("PLASMA_zpotrs", "illegal value of NRHS"); return -3; } if (LDA < max(1, N)) { plasma_error("PLASMA_zpotrs", "illegal value of LDA"); return -5; } if (LDB < max(1, N)) { plasma_error("PLASMA_zpotrs", "illegal value of LDB"); return -7; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zpotrs", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_sgesv - Computes the solution to a system of linear equations A * X = B, * where A is an N-by-N matrix and X and B are N-by-NRHS matrices. * The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. * The factored form of A is then used to solve the system of equations A * X = B. * ******************************************************************************* * * @param[in] N * The number of linear equations, i.e., the order of the matrix A. N >= 0. * * @param[in] NRHS * The number of right hand sides, i.e., the number of columns of the matrix B. * NRHS >= 0. * * @param[in,out] A * On entry, the N-by-N coefficient matrix A. * On exit, the tile L and U factors from the factorization. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). * * @param[out] IPIV * On exit, the pivot indices that define the permutations. * * @param[in,out] B * On entry, the N-by-NRHS matrix of right hand side matrix B. * On exit, if return value = 0, the N-by-NRHS solution matrix X. * * @param[in] LDB * The leading dimension of the array B. LDB >= max(1,N). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, * but the factor U is exactly singular, so the solution could not be computed. * ******************************************************************************* * * @sa PLASMA_sgesv_Tile * @sa PLASMA_sgesv_Tile_Async * @sa PLASMA_cgesv * @sa PLASMA_dgesv * @sa PLASMA_sgesv * ******************************************************************************/ int PLASMA_sgesv(int N, int NRHS, float *A, int LDA, int *IPIV, float *B, int LDB) { int NB, IB, IBNB, NT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descB; plasma = plasma_context_self(); if (plasma == NULL) { plasma_error("PLASMA_sgesv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_sgesv", "illegal value of N"); return -1; } if (NRHS < 0) { plasma_error("PLASMA_sgesv", "illegal value of NRHS"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_sgesv", "illegal value of LDA"); return -4; } if (LDB < max(1, N)) { plasma_error("PLASMA_sgesv", "illegal value of LDB"); return -8; } /* Quick return */ if (min(N, NRHS) == 0) return PLASMA_SUCCESS; /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SGESV, N, N, NRHS); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_sgesv", "plasma_tune() failed"); return status; } /* Set NT & NTRHS */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; NT = (N%NB==0) ? (N/NB) : (N/NB+1); plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) ); plasma_sooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB))); } else {
/***************************************************************************//** * * @ingroup float * * PLASMA_splgsy - Generate a random hermitian matrix by tiles. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] N * The order of the matrix A. N >= 0. * * @param[out] A * On exit, The random hermitian matrix A generated. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_splgsy_Tile * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy * @sa PLASMA_dplgsy * @sa PLASMA_splgsy * @sa PLASMA_splrnt * @sa PLASMA_splgsy * ******************************************************************************/ int PLASMA_splgsy( float bump, int N, float *A, int LDA, unsigned long long int seed ) { int NB; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Check input arguments */ if (N < 0) { plasma_error("PLASMA_splgsy", "illegal value of N"); return -2; } if (LDA < max(1, N)) { plasma_error("PLASMA_splgsy", "illegal value of LDA"); return -4; } /* Quick return */ if (max(0, N) == 0) return PLASMA_SUCCESS; /* Tune NB depending on M, N & NRHS; Set NBNB */ status = plasma_tune(PLASMA_FUNC_SGEMM, N, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy", "plasma_tune() failed"); return status; } /* Set NT */ NB = PLASMA_NB; plasma_sequence_create(plasma, &sequence); descA = plasma_desc_init( PlasmaRealFloat, NB, NB, NB*NB, LDA, N, 0, 0, N, N); descA.mat = A; /* Call the tile interface */ PLASMA_splgsy_Tile_Async( bump, &descA, seed, sequence, &request ); plasma_siptile2lap( descA, A, NB, NB, LDA, N ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * Parallel construction of Q using tile V (application to identity) - dynamic scheduling **/ void plasma_pdorgqr_quark(PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int k, m, n; int ldak, ldqk, ldam, ldqm; int tempmm, tempnn, tempkmin, tempkm; int tempAkm, tempAkn; int ib; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); ib = PLASMA_IB; for (k = min(A.mt, A.nt)-1; k >= 0; k--) { tempAkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempAkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempkmin = min( tempAkn, tempAkm ); tempkm = k == Q.mt-1 ? Q.m-k*Q.mb : Q.mb; ldak = BLKLDD(A, k); ldqk = BLKLDD(Q, k); for (m = Q.mt - 1; m > k; m--) { tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb; ldam = BLKLDD(A, m); ldqm = BLKLDD(Q, m); for (n = 0; n < Q.nt; n++) { tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb; QUARK_CORE_dtsmqr( plasma->quark, &task_flags, PlasmaLeft, PlasmaNoTrans, Q.mb, tempnn, tempmm, tempnn, tempAkn, ib, T.nb, Q(k, n), ldqk, Q(m, n), ldqm, A(m, k), ldam, T(m, k), T.mb); } } for (n = 0; n < Q.nt; n++) { tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb; QUARK_CORE_dormqr( plasma->quark, &task_flags, PlasmaLeft, PlasmaNoTrans, tempkm, tempnn, tempkmin, ib, T.nb, A(k, k), ldak, T(k, k), T.mb, Q(k, n), ldqk); } } }
/***************************************************************************//** * * @ingroup float_Tile_Async * * PLASMA_splgsy_Tile_Async - Generate a random hermitian matrix by tiles. * Non-blocking equivalent of PLASMA_splgsy_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_splgsy * @sa PLASMA_splgsy_Tile * @sa PLASMA_cplgsy_Tile_Async * @sa PLASMA_dplgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_splgsy_Tile_Async * ******************************************************************************/ int PLASMA_splgsy_Tile_Async( float bump, PLASMA_desc *A, unsigned long long int seed, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_splgsy_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_splgsy_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Quick return */ if (min( descA.m, descA.n ) == 0) return PLASMA_SUCCESS; plasma_parallel_call_5(plasma_psplgsy, float, bump, PLASMA_desc, descA, unsigned long long int, seed, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t_Tile_Async * * PLASMA_cpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric * positive definite or Hermitian positive definite matrix. * Non-blocking equivalent of PLASMA_cpotrf_Tile(). * May return before the computation is finished. * Allows for pipelining of operations ar runtime. * ******************************************************************************* * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_cpotrf * @sa PLASMA_cpotrf_Tile * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_dpotrf_Tile_Async * @sa PLASMA_spotrf_Tile_Async * @sa PLASMA_cpotrs_Tile_Async * ******************************************************************************/ int PLASMA_cpotrf_Tile_Async(PLASMA_enum uplo, PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA = *A; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(&descA) != PLASMA_SUCCESS) { plasma_error("PLASMA_cpotrf_Tile", "invalid descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_cpotrf_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if (uplo != PlasmaUpper && uplo != PlasmaLower) { plasma_error("PLASMA_cpotrf_Tile", "illegal value of uplo"); return plasma_request_fail(sequence, request, -1); } /* Quick return */ /* if (max(N, 0) == 0) return PLASMA_SUCCESS; */ plasma_parallel_call_4(plasma_pcpotrf, PLASMA_enum, uplo, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ibnb(int M, int N, PLASMA_enum func, int type, void **memptr) { size_t size; int status; int IB, NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ibnb", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ibnb", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; IB = PLASMA_IB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); /* Size is doubled for RH QR to store the reduction T */ if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) && (func == PLASMA_FUNC_SGELS || func == PLASMA_FUNC_DGELS || func == PLASMA_FUNC_CGELS || func == PLASMA_FUNC_ZGELS || func == PLASMA_FUNC_SGESVD || func == PLASMA_FUNC_DGESVD || func == PLASMA_FUNC_CGESVD || func == PLASMA_FUNC_ZGESVD )) NT *= 2; size = (size_t)MT*NT*IB*NB * plasma_element_size(type); if (size <= 0) { *memptr = NULL; return PLASMA_SUCCESS; } // status = posix_memalign(memptr, STANDARD_PAGE_SIZE, size); *memptr = malloc(size); // if (status != 0) { if (*memptr == NULL) { plasma_error("plasma_alloc_ibnb_tile", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } return PLASMA_SUCCESS; }
/***************************************************************************//** * Parallel forward substitution for tile LU - dynamic scheduling **/ void plasma_pztrsmpl_quark(PLASMA_desc A, PLASMA_desc B, PLASMA_desc L, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int k, m, n; int ldak, ldam, ldbk, ldbm; int tempkm, tempnn, tempkmin, tempmm, tempkn; int ib; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); ib = PLASMA_IB; for (k = 0; k < min(A.mt, A.nt); k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; QUARK_CORE_zgessm( plasma->quark, &task_flags, tempkm, tempnn, tempkmin, ib, L.nb, IPIV(k, k), A(k, k), ldak, B(k, n), ldbk); } for (m = k+1; m < A.mt; m++) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; QUARK_CORE_zssssm( plasma->quark, &task_flags, A.nb, tempnn, tempmm, tempnn, tempkn, ib, L.nb, B(k, n), ldbk, B(m, n), ldbm, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); } } } }
/***************************************************************************//** * Parallel tile row interchanges - dynamic scheduling **/ void plasma_pclaswp_quark(PLASMA_desc B, int *IPIV, int inc, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int m, n; int tempi, tempm, tempmm, tempnn; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); if ( inc > 0 ) { for (m = 0; m < B.mt; m++) { tempi = m * B.mb; tempm = B.m - tempi; tempmm = m == B.mt-1 ? tempm : B.mb; for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb; QUARK_CORE_claswp_ontile( plasma->quark, &task_flags, plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn), B(m, n), 1, tempmm, IPIV(m), inc, B(B.mt-1, n) ); } } } else { for (m = B.mt-1; m > -1; m--) { tempi = m * B.mb; tempm = B.m - tempi; tempmm = m == B.mt-1 ? tempm : B.mb; for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb; QUARK_CORE_claswp_ontile( plasma->quark, &task_flags, plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn), B(m, n), 1, tempmm, IPIV(m), inc, B(0, n) ); } } } }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile_Async * * PLASMA_zgetrf_nopiv_Tile_Async - Computes the tile LU factorization of a * matrix. Non-blocking equivalent of PLASMA_zgetrf_nopiv_Tile(). May return * before the computation is finished. Allows for pipelining of operations ar * runtime. * ******************************************************************************* * * @param[in,out] A * On entry, the M-by-N matrix to be factored. * On exit, the tile factors L and U from the factorization. * * @param[in] sequence * Identifies the sequence of function calls that this call belongs to * (for completion checks and exception handling purposes). * * @param[out] request * Identifies this function call (for exception handling purposes). * ******************************************************************************* * * @sa PLASMA_zgetrf_nopiv * @sa PLASMA_zgetrf_nopiv_Tile * @sa PLASMA_cgetrf_nopiv_Tile_Async * @sa PLASMA_dgetrf_nopiv_Tile_Async * @sa PLASMA_sgetrf_nopiv_Tile_Async * @sa PLASMA_zgetrs_Tile_Async * ******************************************************************************/ int PLASMA_zgetrf_nopiv_Tile_Async(PLASMA_desc *A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_desc descA; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (sequence == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL sequence"); return PLASMA_ERR_UNALLOCATED; } if (request == NULL) { plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL request"); return PLASMA_ERR_UNALLOCATED; } /* Check sequence status */ if (sequence->status == PLASMA_SUCCESS) request->status = PLASMA_SUCCESS; else return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ if (plasma_desc_check(A) != PLASMA_SUCCESS) { plasma_error("PLASMA_zgetrf_nopiv_Tile", "invalid first descriptor"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } else { descA = *A; } /* Check input arguments */ if (descA.nb != descA.mb) { plasma_error("PLASMA_zgetrf_nopiv_Tile", "only square tiles supported"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } plasma_dynamic_call_3(plasma_pzgetrf_nopiv, PLASMA_desc, descA, PLASMA_sequence*, sequence, PLASMA_request*, request); return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Dealloc_Handle - Deallocate workspace handle allocated by any workspace allocation routine. * ******************************************************************************* * * @param[in] handle * Workspace handle * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Dealloc_Handle(void **handle) { plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Dealloc_Handle", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (*handle == NULL) { plasma_error("PLASMA_Dealloc_Handle", "attempting to deallocate a NULL handle"); return PLASMA_ERR_UNALLOCATED; } free(*handle); *handle = NULL; return PLASMA_SUCCESS; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zlansy_Tile - Tile equivalent of PLASMA_zlansy(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] norm * = PlasmaMaxNorm: Max norm * = PlasmaOneNorm: One norm * = PlasmaInfNorm: Infinity norm * = PlasmaFrobeniusNorm: Frobenius norm * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] A * On entry, the triangular factor U or L. * On exit, if UPLO = 'U', the upper triangle of A is * overwritten with the upper triangle of the product U * U'; * if UPLO = 'L', the lower triangle of A is overwritten with * the lower triangle of the product L' * L. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zlansy * @sa PLASMA_zlansy_Tile_Async * @sa PLASMA_clansy_Tile * @sa PLASMA_dlansy_Tile * @sa PLASMA_slansy_Tile * ******************************************************************************/ double PLASMA_zlansy_Tile(PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; double value; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_zlansy_Tile_Async(norm, uplo, A, &value, sequence, &request); plasma_dynamic_sync(); plasma_sequence_destroy(plasma, sequence); return value; }
/***************************************************************************//** * * @ingroup PLASMA_Complex32_t_Tile * * PLASMA_cpotrf_Tile - Computes the Cholesky factorization of a symmetric positive definite * or Hermitian positive definite matrix. * Tile equivalent of PLASMA_cpotrf(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] uplo * = PlasmaUpper: Upper triangle of A is stored; * = PlasmaLower: Lower triangle of A is stored. * * @param[in] A * On entry, the symmetric positive definite (or Hermitian) matrix A. * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A * contains the upper triangular part of the matrix A, and the strictly lower triangular * part of A is not referenced. * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization * A = U**H*U or A = L*L**H. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval >0 if i, the leading minor of order i of A is not positive definite, so the * factorization could not be completed, and the solution has not been computed. * ******************************************************************************* * * @sa PLASMA_cpotrf * @sa PLASMA_cpotrf_Tile_Async * @sa PLASMA_cpotrf_Tile * @sa PLASMA_dpotrf_Tile * @sa PLASMA_spotrf_Tile * @sa PLASMA_cpotrs_Tile * ******************************************************************************/ int PLASMA_cpotrf_Tile(PLASMA_enum uplo, PLASMA_desc *A) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_cpotrf_Tile_Async(uplo, A, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t_Tile * * PLASMA_zlaswp_Tile - performs a series of row interchanges on the matrix A. * One row interchange is initiated for each of rows K1 through K2 of A. * Tile equivalent of PLASMA_zlaswp(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A * The tile factors L and U from the factorization, computed by PLASMA_zgetrf. * * @param[in] K1 * The first element of IPIV for which a row interchange will * be done. * * @param[in] K2 * The last element of IPIV for which a row interchange will * be done. * * @param[in] IPIV * The pivot indices from PLASMA_zgetrf. * * @param[in] INCX * The increment between successive values of IPIV. If IPIV * is negative, the pivots are applied in reverse order. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_zlaswp * @sa PLASMA_zlaswp_Tile_Async * @sa PLASMA_claswp_Tile * @sa PLASMA_dlaswp_Tile * @sa PLASMA_slaswp_Tile * @sa PLASMA_zgetrf_Tile * ******************************************************************************/ int PLASMA_zlaswp_Tile(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_zlaswp_Tile_Async(A, K1, K2, IPIV, INCX, sequence, &request); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup float_Tile * * PLASMA_splgsy_Tile - Generate a random hermitian matrix by tiles. * Tile equivalent of PLASMA_splgsy(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. * All dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] bump * The value to add to the diagonal to be sure * to have a positive definite matrix. * * @param[in] A * On exit, The random hermitian matrix A generated. * * @param[in] seed * The seed used in the random generation. * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************* * * @sa PLASMA_splgsy * @sa PLASMA_splgsy_Tile_Async * @sa PLASMA_cplgsy_Tile * @sa PLASMA_dplgsy_Tile * @sa PLASMA_splgsy_Tile * @sa PLASMA_splrnt_Tile * @sa PLASMA_splgsy_Tile * ******************************************************************************/ int PLASMA_splgsy_Tile( float bump, PLASMA_desc *A, unsigned long long int seed ) { plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; int status; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } plasma_sequence_create(plasma, &sequence); PLASMA_splgsy_Tile_Async( bump, A, seed, sequence, &request ); plasma_dynamic_sync(); status = sequence->status; plasma_sequence_destroy(plasma, sequence); return status; }
/***************************************************************************//** * * @ingroup Auxiliary * * PLASMA_Dealloc_Handle_Tile - Deallocate Tile workspace handle allocated by any tile workspace allocation routine. * ******************************************************************************* * * @param[in] desc * Descriptot handle * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * ******************************************************************************/ int PLASMA_Dealloc_Handle_Tile(PLASMA_desc **desc) { plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_Dealloc_Handle_Tile", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } if (*desc == NULL) { plasma_error("PLASMA_Dealloc_Handle_Tile", "attempting to deallocate a NULL descriptor"); return PLASMA_ERR_UNALLOCATED; } if ((*desc)->mat == NULL) { plasma_error("PLASMA_Dealloc_Handle_Tile", "attempting to deallocate a NULL pointer"); return PLASMA_ERR_UNALLOCATED; } free((*desc)->mat); free(*desc); *desc = NULL; return PLASMA_SUCCESS; }
/***************************************************************************//** * **/ int plasma_alloc_ipiv(int M, int N, PLASMA_enum func, void **memptr) { size_t size; int status; int NB, MT, NT; plasma_context_t *plasma; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("plasma_alloc_ipiv", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set IBNBSIZE */ status = plasma_tune(func, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("plasma_alloc_ipiv", "plasma_tune() failed"); return PLASMA_ERR_UNEXPECTED; } /* Set MT & NT & allocate */ NB = PLASMA_NB; NT = (N%NB==0) ? (N/NB) : ((N/NB)+1); MT = (M%NB==0) ? (M/NB) : ((M/NB)+1); size = (size_t)MT*NT * NB * sizeof(int); if (size <= 0) { *memptr = NULL; return PLASMA_SUCCESS; } // status = posix_memalign(memptr, CACHE_LINE_SIZE, size); *memptr = malloc(size); // if (status != 0) { if (*memptr == NULL) { plasma_error("plasma_alloc_ipiv", "malloc() failed"); return PLASMA_ERR_OUT_OF_RESOURCES; } return PLASMA_SUCCESS; }
/***************************************************************************//** * Zeroes a submatrix in tile layout - dynamic scheduling **/ void plasma_pztile_zero_quark(PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_Complex64_t *bdl; plasma_context_t *plasma; int X1, Y1; int X2, Y2; int n, m, ldt; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); for (m = 0; m < A.mt; m++) { ldt = BLKLDD(A, m); for (n = 0; n < A.nt; n++) { X1 = n == 0 ? A.j%A.nb : 0; Y1 = m == 0 ? A.i%A.mb : 0; X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; bdl = ABDL(m, n); QUARK_Insert_Task(plasma->quark, CORE_ztile_zero_quark, &task_flags, sizeof(int), &X1, VALUE, sizeof(int), &X2, VALUE, sizeof(int), &Y1, VALUE, sizeof(int), &Y2, VALUE, sizeof(PLASMA_Complex64_t)*A.bsiz, bdl, OUTPUT | LOCALITY, sizeof(int), &ldt, VALUE, 0); } } }
/***************************************************************************//** * Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling **/ void plasma_pzlapack_to_tile_quark(PLASMA_Complex64_t *Af77, int lda, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request) { PLASMA_Complex64_t *f77; PLASMA_Complex64_t *bdl; plasma_context_t *plasma; int X1, Y1; int X2, Y2; int n, m, ldt; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); for (m = 0; m < A.mt; m++) { ldt = BLKLDD(A, m); for (n = 0; n < A.nt; n++) { X1 = n == 0 ? A.j%A.nb : 0; Y1 = m == 0 ? A.i%A.mb : 0; X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; f77 = AF77(m, n); bdl = ABDL(m, n); QUARK_CORE_zlacpy( plasma->quark, &task_flags, PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb, &(f77[X1*lda+Y1]), lda, &(bdl[X1*lda+Y1]), ldt); } } }
/***************************************************************************//** * Parallel tile matrix-matrix multiplication - dynamic scheduling **/ void plasma_pzgemm_quark(PLASMA_enum transA, PLASMA_enum transB, PLASMA_Complex64_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_Complex64_t beta, PLASMA_desc C, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int m, n, k; int ldam, ldak, ldbn, ldbk, ldcm; int tempmm, tempnn, tempkn, tempkm; PLASMA_Complex64_t zbeta; PLASMA_Complex64_t zone = (PLASMA_Complex64_t)1.0; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); for (m = 0; m < C.mt; m++) { tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb; ldcm = BLKLDD(C, m); for (n = 0; n < C.nt; n++) { tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb; /* * A: PlasmaNoTrans / B: PlasmaNoTrans */ if (transA == PlasmaNoTrans) { ldam = BLKLDD(A, m); if (transB == PlasmaNoTrans) { for (k = 0; k < A.nt; k++) { tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; ldbk = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; QUARK_CORE_zgemm( plasma->quark, &task_flags, transA, transB, tempmm, tempnn, tempkn, A.mb, alpha, A(m, k), ldam, /* lda * Z */ B(k, n), ldbk, /* ldb * Y */ zbeta, C(m, n), ldcm); /* ldc * Y */ } } /* * A: PlasmaNoTrans / B: Plasma[Conj]Trans */ else { ldbn = BLKLDD(B, n); for (k = 0; k < A.nt; k++) { tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; zbeta = k == 0 ? beta : zone; QUARK_CORE_zgemm( plasma->quark, &task_flags, transA, transB, tempmm, tempnn, tempkn, A.mb, alpha, A(m, k), ldam, /* lda * Z */ B(n, k), ldbn, /* ldb * Z */ zbeta, C(m, n), ldcm); /* ldc * Y */ } } } /* * A: Plasma[Conj]Trans / B: PlasmaNoTrans */ else { if (transB == PlasmaNoTrans) { for (k = 0; k < A.mt; k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; QUARK_CORE_zgemm( plasma->quark, &task_flags, transA, transB, tempmm, tempnn, tempkm, A.mb, alpha, A(k, m), ldak, /* lda * X */ B(k, n), ldbk, /* ldb * Y */ zbeta, C(m, n), ldcm); /* ldc * Y */ } } /* * A: Plasma[Conj]Trans / B: Plasma[Conj]Trans */ else { ldbn = BLKLDD(B, n); for (k = 0; k < A.mt; k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; ldak = BLKLDD(A, k); zbeta = k == 0 ? beta : zone; QUARK_CORE_zgemm( plasma->quark, &task_flags, transA, transB, tempmm, tempnn, tempkm, A.mb, alpha, A(k, m), ldak, /* lda * X */ B(n, k), ldbn, /* ldb * Z */ zbeta, C(m, n), ldcm); /* ldc * Y */ } } } } } }
/***************************************************************************//** * Parallel Reduction from BAND tridiagonal to the final condensed form - dynamic scheduler **/ void plasma_pzhbrdt_quark(PLASMA_enum uplo, PLASMA_desc A, double *D, double *E, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; #ifdef COMPLEX static PLASMA_Complex64_t zone = (PLASMA_Complex64_t) 1.0; static double dzero = (double) 0.0; PLASMA_Complex64_t ztmp; double absztmp; #endif PLASMA_Complex64_t *C, *S; int blksweep, lcsweep, blkid, lcNB; int N, NB, NT, grsiz, lcgrsiz; int i; size_t eltsize = plasma_element_size(A.dtyp); plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); NT = A.nt; N = A.m; NB = A.mb; /* Quick return */ if (N == 0){ return; } if (NB == 0) { memset(D, 0, N*sizeof(double)); memset(E, 0, (N-1)*sizeof(double)); #ifdef COMPLEX for (i=0; i<N; i++) D[i] = cabs(*A(i,i)); #else for (i=0; i<N; i++) D[i] = *A(i,i); #endif return; } /* * Barrier is used because the bulge have to wait until * the reduction to band has been finish. * otherwise, I can remove this BARRIER when I integrate * the function dependencies link inside the reduction to * band. Keep in min the case when NB=1, where no bulge-chasing. */ /***************************************************************/ QUARK_Barrier(plasma->quark); tblg = -Wtimming(); /***************************************************************/ /* * Case NB=1 ==> matrix is already Bidiagonal. no need to bulge. * Make diagonal and superdiagonal elements real, storing them in * D and E. if PlasmaLower, first transform lower bidiagonal form * to upper bidiagonal by applying plane rotations/ Householder * from the left, overwriting superdiagonal elements then make * elements real of the resulting upper Bidiagonal. if PlasmaUpper * then make its elements real. For Q, PT: ZSCAL should be done * in case of WANTQ. */ if (NB == 1){ memset(D, 0, N *sizeof(double)); memset(E, 0, (N-1)*sizeof(double)); #ifdef COMPLEX if(uplo==PlasmaLower){ for (i=0; i<N; i++) { D[i] = creal( *A(i, i) ); /* diag value */ if( i < (N-1)) { /* lower off-diag value */ ztmp = *A((i+1),i); absztmp = cabs(ztmp); *A((i+1),i) = absztmp; E[i] = absztmp; if(absztmp != dzero) ztmp = (PLASMA_Complex64_t) (ztmp / absztmp); else ztmp = zone; if(i<(N-2)) *A((i+2),(i+1)) = *A((i+2),(i+1)) * ztmp; /* for Q: ZSCAL should be done in case of WANTQ */ } } } else { /* PlasmaUpper */ for (i=0; i<N; i++) { D[i] = creal( *A(i,i) ); /* diag value*/ if(i<(N-1)) { /* lower off-diag value */ ztmp = *A(i, (i+1)); absztmp = cabs(ztmp); *A(i,(i+1)) = absztmp; E[i] = absztmp; if(absztmp != dzero) ztmp = (PLASMA_Complex64_t) (ztmp / absztmp); else ztmp = zone; if(i<(N-2)) *A((i+1),(i+2)) = *A((i+1),(i+2)) * ztmp; /* for Q: ZSCAL should be done in case of WANTQ. HERE NEED THE multiply by CONJ(T) */ } } } /* end PlasmaUpper*/ #else if( uplo == PlasmaLower ){ for (i=0; i < N-1; i++) { D[i] = *A(i, i); E[i] = *A(i+1, i); } D[i] = *A(i, i); } else { for (i=0; i < N-1; i++) { D[i] = *A(i, i ); E[i] = *A(i, i+1); } D[i] = *A(i, i); } #endif return; } /* Case N<NB ==> matrix is very small and better to call lapack XHETRD. */ if( N <= 0 ) /* this will be removed we don t need it. */ { PLASMA_Complex64_t *work, *TTau; int info, ldwork = N*N; work = (PLASMA_Complex64_t *) plasma_shared_alloc(plasma, ldwork, PlasmaComplexDouble); TTau = (PLASMA_Complex64_t *) plasma_shared_alloc(plasma, N, PlasmaComplexDouble); info = LAPACKE_zhetrd_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, A(0,0), A.lm, D, E, TTau, work, ldwork); plasma_shared_free(plasma, (void*) work); plasma_shared_free(plasma, (void*) TTau); if( info == 0 ) sequence->status = PLASMA_SUCCESS; else plasma_sequence_flush(plasma->quark, sequence, request, info); return; } /* General case NB > 1 && N > NB */ C = (PLASMA_Complex64_t *) plasma_shared_alloc(plasma, N, PlasmaComplexDouble); S = (PLASMA_Complex64_t *) plasma_shared_alloc(plasma, N, PlasmaComplexDouble); /*************************************************************************** * START BULGE CHASING CODE **************************************************************************/ /* * Initialisation of local parameter. those parameter should be * input or tuned parameter. */ grsiz = 1; if( NB > 160 ) { grsiz = 1; } else if( NB > 100 ) { grsiz = 1; /* if( N < 5000 ) grsiz = 1; else grsiz = 2; */ } else { grsiz = 2; } grsiz = max(1, grsiz); /*grsiz=1;*/ /*printf(" Version -dp- N %5d NB %5d lcNB %5d grsiz %5d A.ln %5d A.nb %5d \n",N,NB,lcNB,grsiz,A.ln,A.nb);*/ for (blksweep = 0; blksweep<NT; blksweep++){ lcNB = blksweep == NT-1 ? A.n-blksweep*A.nb : A.nb; /*printf(" Version -dp- N %5d NB %5d lcNB %5d grsiz %5d blksweep%5d NT %5d \n",N,NB,lcNB,grsiz,blksweep,NT);*/ for (lcsweep = 0; lcsweep<lcNB; lcsweep++){ for (blkid = blksweep; blkid<NT; blkid=blkid+grsiz){ lcgrsiz = (blkid+1) < NT ? grsiz : NT-blkid; /*printf(" Version -dp- N %5d NB %5d lcNB %5d grsiz %5d lcgrsiz %5d blkid %5d \n",N,NB,lcNB,grsiz,lcgrsiz,blkid);*/ QUARK_CORE_ztrdalg_v2( plasma->quark, &task_flags, uplo, &A, C, S, lcgrsiz, lcsweep, blkid, blksweep); } } } /* * Barrier used only for now, to be sure that everything * is done before copying the D and E and free workspace. * this will be removed later when D and E are directly filled * during the bulge process. */ QUARK_Barrier(plasma->quark); tblg += Wtimming(); printf(" done with bulge %lf \n\n\n",tblg); plasma_shared_free(plasma, (void*) C); plasma_shared_free(plasma, (void*) S); /* * STORE THE RESULTING diagonal/off-diagonal in D AND E */ memset(D, 0, N *sizeof(double)); memset(E, 0, (N-1)*sizeof(double)); /* Make diagonal and superdiagonal elements real, * storing them in D and E */ /* In complex case, the off diagonal element are * not necessary real. we have to make off-diagonal * elements real and copy them to E. * When using HouseHolder elimination, * the ZLARFG give us a real as output so, all the * diagonal/off-diagonal element except the last one are already * real and thus we need only to take the abs of the last * one. * */ #ifdef COMPLEX if(uplo==PlasmaLower){ for (i=0; i < N-1 ; i++) { D[i] = creal( *A(i,i) ); /* * Alternative for Householder case, all off-diag * are real except the last off-diag, where we * have to take the abs */ if(i<(N-2)) E[i] = creal(*A(i+1, i)); else E[i] = cabs( *A(i+1, i)); } D[i] = creal( *A(i, i) ); } else { /* PlasmaUpper */ for (i=0; i<N-1; i++) { D[i] = creal( *A(i,i) ); /* * Alternative for Householder case, all off-diag * are real except the last off-diag, where we * have to take the abs */ if( i < (N-2) ) E[i] = creal(*A(i, (i+1))); else E[i] = cabs(*A(i, (i+1))); } D[i] = creal( *A(i, i) ); } /* end PlasmaUpper */ #else if( uplo == PlasmaLower ){ for (i=0; i < N-1; i++) { D[i] = *A(i, i); E[i] = *A(i+1, i); } D[i] = *A(i, i); } else { for (i=0; i < N-1; i++) { D[i] = *A(i, i ); E[i] = *A(i, i+1); } D[i] = *A(i, i); } #endif } /* END FUNCTION */
/***************************************************************************//** * * @ingroup PLASMA_Complex64_t * * PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex * M-by-N matrix A, optionally computing the left and/or right singular * vectors. The SVD is written * * A = U * SIGMA * transpose(V) * * where SIGMA is an M-by-N matrix which is zero except for its * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA * are the singular values of A; they are real and non-negative, and * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * * Note that the routine returns V**T, not V. * Not LAPACK Compliant for now! * Note: Only PlasmaNoVec supported! ******************************************************************************* * * @param[in] jobu * Specifies options for computing all or part of the matrix U. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] jobvt * Specifies options for computing all or part of the matrix V**H. * Intended usage: * = PlasmaVec: all M columns of U are returned in array U; * = PlasmaNoVec: no columns of U (no left singular vectors) are * computed. * Note: Only PlasmaNoVec supported! * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, * if JOBU = 'O', A is overwritten with the first min(m,n) * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) * rows of V**H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[out] S * The double precision singular values of A, sorted so that S(i) >= S(i+1). * * @param[out] U * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. * If JOBU = 'A', U contains the M-by-M unitary matrix U; * if JOBU = 'S', U contains the first min(m,n) columns of U * (the left singular vectors, stored columnwise); * if JOBU = 'N' or 'O', U is not referenced. * * @param[in] LDU * The leading dimension of the array U. LDU >= 1; if * JOBU = 'S' or 'A', LDU >= M. * * @param[out] VT * If JOBVT = 'A', VT contains the N-by-N unitary matrix * V**H; * if JOBVT = 'S', VT contains the first min(m,n) rows of * V**H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * @param[in] LDVT * The leading dimension of the array VT. LDVT >= 1; if * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). * * @param[in, out] descT * On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd * On exit, contains auxiliary factorization data. * * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * * @sa PLASMA_zgesvd_Tile * @sa PLASMA_zgesvd_Tile_Async * @sa PLASMA_cgesvd * @sa PLASMA_dgesvd * @sa PLASMA_sgesvd * ******************************************************************************/ int PLASMA_zgesvd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N, PLASMA_Complex64_t *A, int LDA, double *S, PLASMA_Complex64_t *U, int LDU, PLASMA_Complex64_t *VT, int LDVT, PLASMA_desc *descT) { int NB, IB, IBNB, minMN, MT, NT, minMTNT; int status; plasma_context_t *plasma; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; PLASMA_desc descA, descU, descVT; plasma = plasma_context_self(); if (plasma == NULL) { plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized"); return PLASMA_ERR_NOT_INITIALIZED; } /* Tune NB & IB depending on M & N; Set NBNB */ status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0); if (status != PLASMA_SUCCESS) { plasma_error("PLASMA_zgesvd", "plasma_tune() failed"); return status; } /* Set MT, NT */ NB = PLASMA_NB; IB = PLASMA_IB; IBNB = IB*NB; MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); minMN = min(M,N); minMTNT = min(MT,NT); /* Check input arguments */ if (jobu != PlasmaNoVec && jobu !=PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobu"); return -1; } if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) { plasma_error("PLASMA_zgesvd", "illegal value of jobvt"); return -2; } if (M < 0) { plasma_error("PLASMA_zgesvd", "illegal value of M"); return -3; } if (N < 0) { plasma_error("PLASMA_zgesvd", "illegal value of N"); return -4; } if (LDA < max(1, M)) { plasma_error("PLASMA_zgesvd", "illegal value of LDA"); return -6; } if (LDU < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDU"); return -9; } if (LDVT < 1) { plasma_error("PLASMA_zgesvd", "illegal value of LDVT"); return -11; } if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || ( descT->m != MT*IB ) || (descT->n != NT*NB) ) { plasma_error("PLASMA_zgesvd", "invalid T descriptor"); return -12; } /* Quick return */ if (min(M, N) == 0) { return PLASMA_SUCCESS; } if (jobu == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -1; } if (jobvt == PlasmaVec) { plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version"); return -2; } plasma_sequence_create(plasma, &sequence); if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) { plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) ); if (jobu == PlasmaVec){ plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU))); }