Beispiel #1
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zlaswp_Tile_Async - performs a series of row interchanges
 *  on the matrix A.  One row interchange is initiated for each of
 *  rows K1 through K2 of A.
 *  Non-blocking equivalent of PLASMA_zlaswp_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zlaswp
 * @sa PLASMA_zlaswp_Tile
 * @sa PLASMA_claswp_Tile_Async
 * @sa PLASMA_dlaswp_Tile_Async
 * @sa PLASMA_slaswp_Tile_Async
 * @sa PLASMA_zgetrf_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zlaswp_Tile_Async(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    if ( (K1 != 1) || (K2 != descA.m) ) {
        plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    plasma_dynamic_call_3(
        plasma_pzbarrier_tl2pnl,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    /* swap */
    plasma_dynamic_call_5(
        plasma_pzlaswp,
        PLASMA_desc, descA,
        int *,       IPIV,
        int,         INCX,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);
    
    plasma_dynamic_call_3(
        plasma_pzbarrier_pnl2tl,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    return PLASMA_SUCCESS;
}
Beispiel #2
0
/***************************************************************************//**
 *
 * @ingroup double_Tile_Async
 *
 *  PLASMA_dsygst_Tile_Async - reduces a complex Hermitian-definite
 *  generalized eigenproblem to standard form.
 *  If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is
 *  overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
 *  If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x
 *  = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.  B must
 *  have been previously factorized as U**T*U or L*L**T by
 *  PLASMA_DPOTRF.
 *  ONLY PlasmaItype == 1 and PlasmaLower supported!
 *  Non-blocking equivalent of PLASMA_dsygst_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_dsygst
 * @sa PLASMA_dsygst_Tile
 * @sa PLASMA_chegst_Tile_Async
 * @sa PLASMA_dsygst_Tile_Async
 * @sa PLASMA_ssygst_Tile_Async
 * @sa PLASMA_dsygv_Tile_Async
 *
 ******************************************************************************/
int PLASMA_dsygst_Tile_Async(PLASMA_enum itype, PLASMA_enum uplo, 
                             PLASMA_desc *A, 
                             PLASMA_desc *B,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    PLASMA_desc descB = *B;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsygst_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_dsygst_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_dsygst_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsygst_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsygst_Tile", "invalid second descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_dsygst_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    
    /* 
     * Transform Hermitian-definite generalized eigenproblem 
     * to standard form
     */
    plasma_dynamic_call_6(plasma_pdsygst,
        PLASMA_enum, itype,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        PLASMA_desc, descB,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    return PLASMA_SUCCESS;
}
Beispiel #3
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex32_t_Tile_Async
 *
 *  PLASMA_cpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric
 *  positive definite or Hermitian positive definite matrix.
 *  Non-blocking equivalent of PLASMA_cpotrf_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_cpotrf
 * @sa PLASMA_cpotrf_Tile
 * @sa PLASMA_cpotrf_Tile_Async
 * @sa PLASMA_dpotrf_Tile_Async
 * @sa PLASMA_spotrf_Tile_Async
 * @sa PLASMA_cpotrs_Tile_Async
 *
 ******************************************************************************/
int PLASMA_cpotrf_Tile_Async(PLASMA_enum uplo, PLASMA_desc *A,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cpotrf_Tile", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_cpotrf_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_cpotrf_Tile", "illegal value of uplo");
        return plasma_request_fail(sequence, request, -1);
    }
    /* Quick return */
/*
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
*/
    plasma_parallel_call_4(plasma_pcpotrf,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    return PLASMA_SUCCESS;
}
Beispiel #4
0
/***************************************************************************//**
 *
 * @ingroup float_Tile_Async
 *
 *  PLASMA_splgsy_Tile_Async - Generate a random hermitian matrix by tiles.
 *  Non-blocking equivalent of PLASMA_splgsy_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_splgsy
 * @sa PLASMA_splgsy_Tile
 * @sa PLASMA_cplgsy_Tile_Async
 * @sa PLASMA_dplgsy_Tile_Async
 * @sa PLASMA_splgsy_Tile_Async
 * @sa PLASMA_splgsy_Tile_Async
 * @sa PLASMA_splgsy_Tile_Async
 *
 ******************************************************************************/
int PLASMA_splgsy_Tile_Async( float          bump,
                              PLASMA_desc     *A,
                              unsigned long long int seed,
                              PLASMA_sequence *sequence, 
                              PLASMA_request  *request)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_splgsy_Tile", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_splgsy_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* Quick return */
    if (min( descA.m, descA.n ) == 0)
        return PLASMA_SUCCESS;

    plasma_parallel_call_5(plasma_psplgsy,
        float, bump,
        PLASMA_desc,        descA,
        unsigned long long int, seed,
        PLASMA_sequence*,   sequence,
        PLASMA_request*,    request);

    return PLASMA_SUCCESS;
}
Beispiel #5
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile
 *
 *  PLASMA_zgetri_Tile - Computes the inverse of a matrix using the LU factorization
 *  computed by PLASMA_zgetrf.
 *  This method inverts U and then computes inv(A) by solving the system
 *  inv(A)*L = inv(U) for inv(A).
 *  Tile equivalent of PLASMA_zgetri().
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in,out] A
 *          On entry, the triangular factor L or U from the
 *          factorization A = P*L*U as computed by PLASMA_zgetrf.
 *          On exit, if return value = 0, the inverse of the original
 *          matrix A.
 *
 * @param[in] IPIV
 *          The pivot indices that define the permutations
 *          as returned by PLASMA_zgetrf.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval >0 if i, the (i,i) element of the factor U is
 *                exactly zero; The matrix is singular
 *                and its inverse could not be computed.
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgetri
 * @sa PLASMA_zgetri_Tile_Async
 * @sa PLASMA_cgetri_Tile
 * @sa PLASMA_dgetri_Tile
 * @sa PLASMA_sgetri_Tile
 * @sa PLASMA_zgetrf_Tile
 *
 ******************************************************************************/
int PLASMA_zgetri_Tile(PLASMA_desc *A, int *IPIV)
{
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descW;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    plasma_sequence_create(plasma, &sequence);

    /* Allocate workspace */
    PLASMA_Alloc_Workspace_zgetri_Tile_Async(A, &descW);

    PLASMA_zgetri_Tile_Async(A, IPIV, &descW, sequence, &request);
    plasma_dynamic_sync();
    plasma_desc_mat_free(&(descW));

    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}
Beispiel #6
0
/***************************************************************************//**
 *  Main thread control
 **/
void *plasma_parallel_section(void *plasma_ptr)
{
    plasma_context_t *plasma = (plasma_context_t*)(plasma_ptr);
    PLASMA_enum action;

    /* Set thread affinity for the worker */
    plasma_setaffinity(plasma->thread_bind[plasma_rank(plasma)]);

    plasma_barrier(plasma);
    while(1) {
        pthread_mutex_lock(&plasma->action_mutex);
        while ((action = plasma->action) == PLASMA_ACT_STAND_BY)
            pthread_cond_wait(&plasma->action_condt, &plasma->action_mutex);
        pthread_mutex_unlock(&plasma->action_mutex);
        plasma_barrier(plasma);

        switch (action) {
            case PLASMA_ACT_PARALLEL:
                plasma->parallel_func_ptr(plasma);
                break;
            case PLASMA_ACT_DYNAMIC:
                QUARK_Worker_Loop(plasma->quark, plasma_rank(plasma));
                break;
            case PLASMA_ACT_FINALIZE:
                return NULL;
            default:
                plasma_fatal_error("plasma_parallel_section", "undefined action");
                return NULL;
        }
        plasma_barrier(plasma);
    }

    plasma_unsetaffinity();
    return NULL;
}
Beispiel #7
0
/** ****************************************************************************
 *
 * @ingroup InPlaceTransformation
 *
 *  PLASMA_dgecfi convert the matrice A in place from format f_in to
 *  format f_out
 *
 *******************************************************************************
 *
 * @param[in] m
 *         Number of rows of matrix A
 *
 * @param[in] n
 *         Number of columns of matrix A
 *
 * @param[in,out] A
 *         Matrix of size L*m*n
 *
 * @param[in] f_in
 *         Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM,
 *         PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
 *
 * @param[in] imb
 *         Number of rows of each block in original format
 *
 * @param[in] inb
 *         Number of columns of each block in original format
 *
 * @param[in] f_out
 *         Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM,
 *         PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
 *
 * @param[in] omb
 *         Number of rows of each block in requested format
 *
 * @param[in] onb
 *         Number of columns of each block in requested format
 *
 *******************************************************************************
 *
 * @sa PLASMA_dgecfi_Async
 *
 ******************************************************************************/
int PLASMA_dgecfi(int m, int n, double *A,
                  PLASMA_enum f_in,  int imb, int inb,
                  PLASMA_enum f_out, int omb, int onb) 
{
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error(__func__, "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    plasma_sequence_create(plasma, &sequence);

    PLASMA_dgecfi_Async( m, n, A,
                         f_in,  imb, inb,
                         f_out, omb, onb,
                         sequence, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);

    return status;
}
Beispiel #8
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zgetrf_nopiv_Tile_Async - Computes the tile LU factorization of a
 *  matrix.  Non-blocking equivalent of PLASMA_zgetrf_nopiv_Tile().  May return
 *  before the computation is finished.  Allows for pipelining of operations ar
 *  runtime.
 *
 *******************************************************************************
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix to be factored.
 *          On exit, the tile factors L and U from the factorization.
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgetrf_nopiv
 * @sa PLASMA_zgetrf_nopiv_Tile
 * @sa PLASMA_cgetrf_nopiv_Tile_Async
 * @sa PLASMA_dgetrf_nopiv_Tile_Async
 * @sa PLASMA_sgetrf_nopiv_Tile_Async
 * @sa PLASMA_zgetrs_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zgetrf_nopiv_Tile_Async(PLASMA_desc *A,
                                   PLASMA_sequence *sequence,
                                   PLASMA_request *request)
{
    PLASMA_desc descA;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(A) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetrf_nopiv_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descA = *A;
    }

    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_zgetrf_nopiv_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    plasma_dynamic_call_3(plasma_pzgetrf_nopiv,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    return PLASMA_SUCCESS;
}
Beispiel #9
0
/***************************************************************************//**
 *
 **/
int plasma_alloc_ibnb_tile(int M, int N, PLASMA_enum func, int type, PLASMA_desc **desc)
{
    int status;
    int IB, NB, MT, NT;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("plasma_alloc_ibnb_tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on M & N; Set IBNBSIZE */
    status = plasma_tune(func, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_alloc_ibnb_tile", "plasma_tune() failed");
        return PLASMA_ERR_UNEXPECTED;
    }

    /* Set MT & NT & allocate */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);

    /* Size is doubled for RH QR to store the reduction T */
    if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) &&
        ((func == PLASMA_FUNC_SGELS)  ||
         (func == PLASMA_FUNC_DGELS)  ||
         (func == PLASMA_FUNC_CGELS)  ||
         (func == PLASMA_FUNC_ZGELS)  ||
         (func == PLASMA_FUNC_SGESVD) ||
         (func == PLASMA_FUNC_DGESVD) ||
         (func == PLASMA_FUNC_CGESVD) ||
         (func == PLASMA_FUNC_ZGESVD)))
        NT *= 2;

    /* Allocate and initialize descriptor */
    *desc = (PLASMA_desc*)malloc(sizeof(PLASMA_desc));
    if (*desc == NULL) {
        plasma_error("plasma_alloc_ibnb_tile", "malloc() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    **desc = plasma_desc_init(type, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

    /* Allocate matrix */
    if (plasma_desc_mat_alloc(*desc)) {
        plasma_error("plasma_alloc_ibnb_tile", "malloc() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }

    /* Check that everything is ok */
    status = plasma_desc_check(*desc);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_alloc_ibnb_tile", "invalid descriptor");
        return status;
    }
    return PLASMA_SUCCESS;
}
Beispiel #10
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t
 *
 *  PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive
 *  definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky
 *  factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf.
 *
 *******************************************************************************
 *
 * @param[in] uplo
 *          = PlasmaUpper: Upper triangle of A is stored;
 *          = PlasmaLower: Lower triangle of A is stored.
 *
 * @param[in] N
 *          The order of the matrix A. N >= 0.
 *
 * @param[in] NRHS
 *          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
 *
 * @param[in] A
 *          The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H,
 *          computed by PLASMA_zpotrf.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,N).
 *
 * @param[in,out] B
 *          On entry, the N-by-NRHS right hand side matrix B.
 *          On exit, if return value = 0, the N-by-NRHS solution matrix X.
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= max(1,N).
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *
 *******************************************************************************
 *
 * @sa PLASMA_zpotrs_Tile
 * @sa PLASMA_zpotrs_Tile_Async
 * @sa PLASMA_cpotrs
 * @sa PLASMA_dpotrs
 * @sa PLASMA_spotrs
 * @sa PLASMA_zpotrf
 *
 ******************************************************************************/
int PLASMA_zpotrs(PLASMA_enum uplo, int N, int NRHS,
                  PLASMA_Complex64_t *A, int LDA,
                  PLASMA_Complex64_t *B, int LDB)
{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_zpotrs", "illegal value of uplo");
        return -1;
    }
    if (N < 0) {
        plasma_error("PLASMA_zpotrs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_zpotrs", "illegal value of NRHS");
        return -3;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zpotrs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_zpotrs", "illegal value of LDB");
        return -7;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zpotrs", "plasma_tune() failed");
        return status;
    }

    /* Set NT & NTRHS */
    NB    = PLASMA_NB;

    plasma_sequence_create(plasma, &sequence);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, N, N   , plasma_desc_mat_free(&(descA)) );
        plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
Beispiel #11
0
/***************************************************************************//**
 *
 * @ingroup float
 *
 *  PLASMA_splgsy - Generate a random hermitian matrix by tiles.
 *
 *******************************************************************************
 *
 * @param[in] bump
 *          The value to add to the diagonal to be sure 
 *          to have a positive definite matrix.
 *
 * @param[in] N
 *          The order of the matrix A. N >= 0.
 *
 * @param[out] A
 *          On exit, The random hermitian matrix A generated.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,M).
 *
 * @param[in] seed
 *          The seed used in the random generation.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *
 *******************************************************************************
 *
 * @sa PLASMA_splgsy_Tile
 * @sa PLASMA_splgsy_Tile_Async
 * @sa PLASMA_cplgsy
 * @sa PLASMA_dplgsy
 * @sa PLASMA_splgsy
 * @sa PLASMA_splrnt
 * @sa PLASMA_splgsy
 *
 ******************************************************************************/
int PLASMA_splgsy( float bump, int N,
                   float *A, int LDA,
                   unsigned long long int seed )
{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_splgsy", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_splgsy", "illegal value of N");
        return -2;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_splgsy", "illegal value of LDA");
        return -4;
    }
    /* Quick return */
    if (max(0, N) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_SGEMM, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_splgsy", "plasma_tune() failed");
        return status;
    }
    
    /* Set NT */
    NB = PLASMA_NB;
    plasma_sequence_create(plasma, &sequence);
    
    descA = plasma_desc_init(
        PlasmaRealFloat, NB, NB, NB*NB,
        LDA, N, 0, 0, N, N);
    descA.mat = A;

    /* Call the tile interface */
    PLASMA_splgsy_Tile_Async( bump, &descA, seed, sequence, &request );

    plasma_siptile2lap( descA, A, NB, NB, LDA, N );
    plasma_dynamic_sync();

    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);

    return status;
}
Beispiel #12
0
/***************************************************************************//**
 *
 **/
int plasma_alloc_ibnb(int M, int N, PLASMA_enum func, int type, void **memptr)
{
    size_t size;
    int status;
    int IB, NB, MT, NT;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("plasma_alloc_ibnb", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on M & N; Set IBNBSIZE */
    status = plasma_tune(func, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_alloc_ibnb", "plasma_tune() failed");
        return PLASMA_ERR_UNEXPECTED;
    }
    /* Set MT & NT & allocate */
    NB = PLASMA_NB;
    IB = PLASMA_IB;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);

    /* Size is doubled for RH QR to store the reduction T */
    if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) &&
        (func == PLASMA_FUNC_SGELS  ||
         func == PLASMA_FUNC_DGELS  ||
         func == PLASMA_FUNC_CGELS  ||
         func == PLASMA_FUNC_ZGELS  ||
         func == PLASMA_FUNC_SGESVD ||
         func == PLASMA_FUNC_DGESVD ||
         func == PLASMA_FUNC_CGESVD ||
         func == PLASMA_FUNC_ZGESVD ))
        NT *= 2;

    size = (size_t)MT*NT*IB*NB * plasma_element_size(type);
    if (size <= 0) {
        *memptr = NULL;
        return PLASMA_SUCCESS;
    }
//  status = posix_memalign(memptr, STANDARD_PAGE_SIZE, size);
    *memptr = malloc(size);
//  if (status != 0) {
    if (*memptr == NULL) {
        plasma_error("plasma_alloc_ibnb_tile", "malloc() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    return PLASMA_SUCCESS;
}
Beispiel #13
0
/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Dealloc_Handle - Deallocate workspace handle allocated by any workspace allocation routine.
 *
 *******************************************************************************
 *
 * @param[in] handle
 *          Workspace handle
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Dealloc_Handle(void **handle)
{
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Dealloc_Handle", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (*handle == NULL) {
        plasma_error("PLASMA_Dealloc_Handle", "attempting to deallocate a NULL handle");
        return PLASMA_ERR_UNALLOCATED;
    }
    free(*handle);
    *handle = NULL;
    return PLASMA_SUCCESS;
}
Beispiel #14
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile
 *
 *  PLASMA_zlansy_Tile - Tile equivalent of PLASMA_zlansy().
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] norm
 *          = PlasmaMaxNorm: Max norm
 *          = PlasmaOneNorm: One norm
 *          = PlasmaInfNorm: Infinity norm
 *          = PlasmaFrobeniusNorm: Frobenius norm
 *
 * @param[in] uplo
 *          = PlasmaUpper: Upper triangle of A is stored;
 *          = PlasmaLower: Lower triangle of A is stored.
 *
 * @param[in] A
 *          On entry, the triangular factor U or L.
 *          On exit, if UPLO = 'U', the upper triangle of A is
 *          overwritten with the upper triangle of the product U * U';
 *          if UPLO = 'L', the lower triangle of A is overwritten with
 *          the lower triangle of the product L' * L.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 *******************************************************************************
 *
 * @sa PLASMA_zlansy
 * @sa PLASMA_zlansy_Tile_Async
 * @sa PLASMA_clansy_Tile
 * @sa PLASMA_dlansy_Tile
 * @sa PLASMA_slansy_Tile
 *
 ******************************************************************************/
double PLASMA_zlansy_Tile(PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A)
{
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    double value;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    plasma_sequence_create(plasma, &sequence);
    PLASMA_zlansy_Tile_Async(norm, uplo, A, &value, sequence, &request);
    plasma_dynamic_sync();
    plasma_sequence_destroy(plasma, sequence);
    return value;
}
Beispiel #15
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile
 *
 *  PLASMA_zlaswp_Tile - performs a series of row interchanges on the matrix A.
 *  One row interchange is initiated for each of rows K1 through K2 of A.
 *  Tile equivalent of PLASMA_zlaswp().
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] A
 *          The tile factors L and U from the factorization, computed by PLASMA_zgetrf.
 *
 * @param[in] K1
 *          The first element of IPIV for which a row interchange will
 *          be done.
 *
 * @param[in] K2
 *          The last element of IPIV for which a row interchange will
 *          be done.
 *
 * @param[in] IPIV
 *          The pivot indices from PLASMA_zgetrf.
 *
 * @param[in] INCX
 *          The increment between successive values of IPIV. If IPIV
 *          is negative, the pivots are applied in reverse order.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 *******************************************************************************
 *
 * @sa PLASMA_zlaswp
 * @sa PLASMA_zlaswp_Tile_Async
 * @sa PLASMA_claswp_Tile
 * @sa PLASMA_dlaswp_Tile
 * @sa PLASMA_slaswp_Tile
 * @sa PLASMA_zgetrf_Tile
 *
 ******************************************************************************/
int PLASMA_zlaswp_Tile(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX)
{
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    plasma_sequence_create(plasma, &sequence);
    PLASMA_zlaswp_Tile_Async(A, K1, K2, IPIV, INCX, sequence, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}
Beispiel #16
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex32_t_Tile
 *
 *  PLASMA_cpotrf_Tile - Computes the Cholesky factorization of a symmetric positive definite
 *  or Hermitian positive definite matrix.
 *  Tile equivalent of PLASMA_cpotrf().
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] uplo
 *          = PlasmaUpper: Upper triangle of A is stored;
 *          = PlasmaLower: Lower triangle of A is stored.
 *
 * @param[in] A
 *          On entry, the symmetric positive definite (or Hermitian) matrix A.
 *          If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A
 *          contains the upper triangular part of the matrix A, and the strictly lower triangular
 *          part of A is not referenced.
 *          If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
 *          triangular part of the matrix A, and the strictly upper triangular part of A is not
 *          referenced.
 *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
 *          A = U**H*U or A = L*L**H.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
 *               factorization could not be completed, and the solution has not been computed.
 *
 *******************************************************************************
 *
 * @sa PLASMA_cpotrf
 * @sa PLASMA_cpotrf_Tile_Async
 * @sa PLASMA_cpotrf_Tile
 * @sa PLASMA_dpotrf_Tile
 * @sa PLASMA_spotrf_Tile
 * @sa PLASMA_cpotrs_Tile
 *
 ******************************************************************************/
int PLASMA_cpotrf_Tile(PLASMA_enum uplo, PLASMA_desc *A)
{
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    plasma_sequence_create(plasma, &sequence);
    PLASMA_cpotrf_Tile_Async(uplo, A, sequence, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}
Beispiel #17
0
/***************************************************************************//**
 *
 * @ingroup float_Tile
 *
 *  PLASMA_splgsy_Tile - Generate a random hermitian matrix by tiles.
 *  Tile equivalent of PLASMA_splgsy().
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] bump
 *          The value to add to the diagonal to be sure 
 *          to have a positive definite matrix.
 *
 * @param[in] A
 *          On exit, The random hermitian matrix A generated.
 *
 * @param[in] seed
 *          The seed used in the random generation.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 *******************************************************************************
 *
 * @sa PLASMA_splgsy
 * @sa PLASMA_splgsy_Tile_Async
 * @sa PLASMA_cplgsy_Tile
 * @sa PLASMA_dplgsy_Tile
 * @sa PLASMA_splgsy_Tile
 * @sa PLASMA_splrnt_Tile
 * @sa PLASMA_splgsy_Tile
 *
 ******************************************************************************/
int PLASMA_splgsy_Tile( float bump, PLASMA_desc *A,
                        unsigned long long int seed )
{
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    plasma_sequence_create(plasma, &sequence);
    PLASMA_splgsy_Tile_Async( bump, A, seed, sequence, &request );
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}
Beispiel #18
0
/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Dealloc_Handle_Tile - Deallocate Tile workspace handle allocated by any tile workspace allocation routine.
 *
 *******************************************************************************
 *
 * @param[in] desc
 *          Descriptot handle
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Dealloc_Handle_Tile(PLASMA_desc **desc)
{
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Dealloc_Handle_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (*desc == NULL) {
        plasma_error("PLASMA_Dealloc_Handle_Tile", "attempting to deallocate a NULL descriptor");
        return PLASMA_ERR_UNALLOCATED;
    }
    if ((*desc)->mat == NULL) {
        plasma_error("PLASMA_Dealloc_Handle_Tile", "attempting to deallocate a NULL pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free((*desc)->mat);
    free(*desc);
    *desc = NULL;
    return PLASMA_SUCCESS;
}
Beispiel #19
0
/***************************************************************************//**
 *
 **/
int plasma_alloc_ipiv(int M, int N, PLASMA_enum func, void **memptr)
{
    size_t size;
    int status;
    int NB, MT, NT;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("plasma_alloc_ipiv", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Tune NB & IB depending on M & N; Set IBNBSIZE */
    status = plasma_tune(func, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_alloc_ipiv", "plasma_tune() failed");
        return PLASMA_ERR_UNEXPECTED;
    }
    /* Set MT & NT & allocate */
    NB = PLASMA_NB;
    NT = (N%NB==0) ? (N/NB) : ((N/NB)+1);
    MT = (M%NB==0) ? (M/NB) : ((M/NB)+1);
    size = (size_t)MT*NT * NB * sizeof(int);
    if (size <= 0) {
        *memptr = NULL;
        return PLASMA_SUCCESS;
    }
//  status = posix_memalign(memptr, CACHE_LINE_SIZE, size);
    *memptr = malloc(size);
//  if (status != 0) {
    if (*memptr == NULL) {
        plasma_error("plasma_alloc_ipiv", "malloc() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    return PLASMA_SUCCESS;
}
Beispiel #20
0
/***************************************************************************//**
 *
 * @ingroup float_Tile_Async
 *
 *  PLASMA_ssytrd_Tile_Async - Computes all eigenvalues and,
 *  optionally, eigenvectors of a complex Hermitian matrix A using a
 *  two-stage approach:
 *  First stage: reduction to band tridiagonal form;
 *  Second stage: reduction from band to tridiagonal form.
 *
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_ssytrd
 * @sa PLASMA_ssytrd_Tile
 * @sa PLASMA_chetrd_Tile_Async
 * @sa PLASMA_dsytrd_Tile_Async
 * @sa PLASMA_ssytrd_Tile_Async
 *
 ******************************************************************************/
int PLASMA_ssytrd_Tile_Async(PLASMA_enum jobz, PLASMA_enum uplo,
                            PLASMA_desc *A,
                            float *D,
                            float *E,
                            PLASMA_desc *T,
                            PLASMA_desc *Q,
                            PLASMA_sequence *sequence, PLASMA_request *request)
{
    int NB, IB, IBNB, NT;
    PLASMA_desc descA = *A;
    PLASMA_desc descT = *T;

    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    NT   = (descA.ln%NB==0) ? (descA.ln/NB) : (descA.ln/NB+1);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if ( (jobz == PlasmaVec) && (plasma_desc_check(Q) != PLASMA_SUCCESS) ) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "illegal value of jobz");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (descA.m != descA.n) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "matrix need to be square");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
   if (descA.nb != descA.mb) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "computing the eigenvectors is not supported in this version");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* Reduction to tridiagonal form
     * with a two-stage approach.
     */

    /* Reduction to BAND tridiagonal form
     */
    plasma_dynamic_call_5(plasma_pssyrbt,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        PLASMA_desc, descT,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    /* 
     * Build the Q of the first stage
     */
    /* if (jobz == PlasmaVec){ */
    /*    /\* Initialize Q to Identity *\/ */
    /*    plasma_dynamic_call_6(plasma_pslaset, */
    /*        PLASMA_enum, PlasmaUpperLower, */
    /*        float, 0.0, */
    /*        float, 1.0, */
    /*        PLASMA_desc, descQ, */
    /*        PLASMA_sequence*, sequence, */
    /*        PLASMA_request*, request); */
    /*    /\* Accumulate the transformations from the first stage*\/ */
    /*    plasma_dynamic_call_6(plasma_psorgtr, */
    /*        PLASMA_enum, uplo, */
    /*        PLASMA_desc, descA, */
    /*        PLASMA_desc, descQ, */
    /*        PLASMA_desc, descT, */
    /*        PLASMA_sequence*, sequence, */
    /*        PLASMA_request*, request); */
    /* } */

    /* Set the V's to zero before the 2nd stage (bulge chasing) */
    /*
    */
    plasma_dynamic_call_5(plasma_pslaset2,
        PLASMA_enum, uplo,
        float, 0.0,
        PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) 
                                         : plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    /* Reduction from BAND tridiagonal to the final condensed form
     */
    plasma_dynamic_call_7(plasma_pssbrdt,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        float*, D,
        float*, E,
        PLASMA_desc, descT,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);


    return PLASMA_SUCCESS;
}
Beispiel #21
0
/** ****************************************************************************
 *
 * @ingroup InPlaceTransformation
 *
 *  PLASMA_dgecfi_Async convert the matrice A in place from format f_in to
 *  format f_out
 *
 *******************************************************************************
 *
 * @param[in] m
 *         Number of rows of matrix A
 *
 * @param[in] n
 *         Number of columns of matrix A
 *
 * @param[in,out] A
 *         Matrix of size L*m*n
 *
 * @param[in] f_in
 *         Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM,
 *         PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
 *
 * @param[in] imb
 *         Number of rows of each block in original format
 *
 * @param[in] inb
 *         Number of columns of each block in original format
 *
 * @param[in] f_out
 *         Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM,
 *         PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
 *
 * @param[in] omb
 *         Number of rows of each block in requested format
 *
 * @param[in] onb
 *         Number of columns of each block in requested format
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_dgecfi
 *
 ******************************************************************************/
int PLASMA_dgecfi_Async(int m, int n, double *A,
                        PLASMA_enum f_in,  int imb, int inb,
                        PLASMA_enum f_out, int omb, int onb,
                        PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    double *W = NULL;
    int im1, in1, om1, on1;
    size_t A11, A21, A12, A22;

    /* Check Plasma context */
    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error(__func__, "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Check arguments */
    if(  ( f_in  != PlasmaCM ) && ( f_in  != PlasmaRM ) 
         && ( f_in  != PlasmaCCRB ) && ( f_in  != PlasmaRRRB ) 
         && ( f_in  != PlasmaCRRB ) && ( f_in  != PlasmaRCRB ) )
    {
        plasma_error(__func__, "Input format unknown");
        return -4;
    }
    if(  ( f_out  != PlasmaCM ) && ( f_out  != PlasmaRM ) 
         && ( f_out  != PlasmaCCRB ) && ( f_out  != PlasmaRRRB ) 
         && ( f_out  != PlasmaCRRB ) && ( f_out  != PlasmaRCRB ) )
    {
        plasma_error(__func__, "Input format unknown");
        return -7;
    }

    /* quick return */
    if( (f_in == f_out) && ( (f_in == PlasmaCM) || (f_in == PlasmaRM))
        && (imb == omb) && ( inb == onb ) ) {
        return PLASMA_SUCCESS;
    }

    if ( (f_in == PlasmaCM) || (f_in == PlasmaRM) )
    {
        if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) ){
            imb = omb = PLASMA_NB;
            inb = onb = PLASMA_NB;
        } else {
            imb = omb;
            inb = onb;
        }
    } 
    else if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) ) 
    {
        omb = imb;
        onb = inb;
    }

    /* calculate number of full blocks */
    im1 = (m / imb) * imb;
    in1 = (n / inb) * inb;
    om1 = (m / omb) * omb;
    on1 = (n / onb) * onb;

    /* separate the four submatrices A11, A12, A21, A22 */
    if( f_in == PlasmaCM ) {
        if( om1 < m ) {
            plasma_static_call_6(plasma_pdpack,
                                 int,                 m,
                                 int,                 on1,
                                 double*, A,
                                 int,                 (m-om1),
                                 PLASMA_sequence*,    sequence,
                                 PLASMA_request*,     request);
            if ( on1 < n) {
                plasma_static_call_6(plasma_pdpack,
                                     int,                 m,
                                     int,                 (n-on1),
                                     double*, &(A[m*on1]),
                                     int,                 (m-om1),
                                     PLASMA_sequence*,    sequence,
                                     PLASMA_request*,     request);
            }
        }
Beispiel #22
0
/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Finalize - Finalize PLASMA.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Finalize()
{
    int core;
    int status;
    void *exitcodep;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Terminate the dynamic scheduler */
    plasma_dynamic_sync();

    /* Free quark structures */
    QUARK_Free(plasma->quark);

    /* Set termination action */
    pthread_mutex_lock(&plasma->action_mutex);
    plasma->action = PLASMA_ACT_FINALIZE;
    pthread_mutex_unlock(&plasma->action_mutex);
    pthread_cond_broadcast(&plasma->action_condt);

    /* Barrier and clear action */
    plasma_barrier(plasma);
    plasma->action = PLASMA_ACT_STAND_BY;

    // Join threads
    for (core = 1; core < plasma->world_size; core++) {
        status = pthread_join(plasma->thread_id[core], &exitcodep);
        if (status != 0) {
            plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed");
            return status;
        }
    }
    plasma_barrier_finalize(plasma);
    plasma_barrier_bw_finalize(plasma);

    /* Unbind main thread */
    plasma_unsetaffinity();

    /* Destroy thread attributes */
    status = pthread_attr_destroy(&plasma->thread_attr);
    if (status != 0)
        plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed");

    /* Destroy topology */
    plasma_topology_finalize();

    status = plasma_context_remove(plasma, pthread_self());
    if (status != PLASMA_SUCCESS) {
        plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed");
        return status;
    }

    /* Restore the concurency */
    /* actually it's really bad, we shoulde set the concurrency only
     * if it's not already done and restore it only we had change it */
    pthread_setconcurrency( 0 );

    return PLASMA_SUCCESS;
}
Beispiel #23
0
/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Init_Affinity - Initialize PLASMA.
 *
 *******************************************************************************
 *
 * @param[in] cores
 *          Number of cores to use (threads to launch).
 *          If cores = 0, cores = PLASMA_NUM_THREADS if it is set, the
 *          system number of core otherwise.
 *
 * @param[in] coresbind
 *          Array to specify where to bind each thread.
 *          Each thread i is binded to coresbind[hwloc(i)] if hwloc is
 *          provided, or to coresbind[i] otherwise.
 *          If coresbind = NULL, coresbind = PLASMA_AFF_THREADS if it
 *          is set, the identity function otherwise.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Init_Affinity(int cores, int *coresbind)
{
    plasma_context_t *plasma;
    int status;
    int core;

    /* Create context and insert in the context map */
    plasma = plasma_context_create();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Init", "plasma_context_create() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    status = plasma_context_insert(plasma, pthread_self());
    if (status != PLASMA_SUCCESS) {
        plasma_fatal_error("PLASMA_Init", "plasma_context_insert() failed");
        return PLASMA_ERR_OUT_OF_RESOURCES;
    }
    /* Init number of cores and topology */
    plasma_topology_init();

    /* Set number of cores */
    if ( cores < 1 ) {
        plasma->world_size = plasma_get_numthreads();
        if ( plasma->world_size == -1 ) {
            plasma->world_size = 1;
            plasma_warning("PLASMA_Init", "Could not find the number of cores: the thread number is set to 1");
        }
    }
    else
      plasma->world_size = cores;

    if (plasma->world_size <= 0) {
        plasma_fatal_error("PLASMA_Init", "failed to get system size");
        return PLASMA_ERR_NOT_FOUND;
    }
    /* Check if not more cores than the hard limit */
    if (plasma->world_size > CONTEXT_THREADS_MAX) {
        plasma_fatal_error("PLASMA_Init", "not supporting so many cores");
        return PLASMA_ERR_INTERNAL_LIMIT;
    }

    /* Get the size of each NUMA node */
    plasma->group_size = plasma_get_numthreads_numa();
    while ( ((plasma->world_size)%(plasma->group_size)) != 0 )
        (plasma->group_size)--;

    /* Initialize barriers */
    plasma_barrier_init(plasma);
    plasma_barrier_bw_init(plasma);

    /* Initialize default thread attributes */
    status = pthread_attr_init(&plasma->thread_attr);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_attr_init() failed");
        return status;
    }
    /* Set scope to system */
    status = pthread_attr_setscope(&plasma->thread_attr, PTHREAD_SCOPE_SYSTEM);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_attr_setscope() failed");
        return status;
    }
    /* Set concurrency */
    status = pthread_setconcurrency(plasma->world_size);
    if (status != 0) {
        plasma_fatal_error("PLASMA_Init", "pthread_setconcurrency() failed");
        return status;
    }
    /*  Launch threads */
    memset(plasma->thread_id,   0, CONTEXT_THREADS_MAX*sizeof(pthread_t));
    if (coresbind != NULL) {
        memcpy(plasma->thread_bind, coresbind, plasma->world_size*sizeof(int));
    }
    else {
        plasma_get_affthreads(plasma->thread_bind);
    }
    /* Assign rank and thread ID for the master */
    plasma->thread_rank[0] = 0;
    plasma->thread_id[0] = pthread_self();

    for (core = 1; core < plasma->world_size; core++) {
        plasma->thread_rank[core] = core;
        pthread_create(
            &plasma->thread_id[core],
            &plasma->thread_attr,
             plasma_parallel_section,
             (void*)plasma);
    }

    /* Ensure BLAS are sequential and set thread affinity for the master */
#if defined(PLASMA_WITH_MKL)
#if defined(__ICC) || defined(__INTEL_COMPILER)
    kmp_set_defaults("KMP_AFFINITY=disabled");
#endif
#endif

    /* Initialize the dynamic scheduler */
    plasma->quark =  QUARK_Setup(plasma->world_size);
    plasma_barrier(plasma);

    plasma_setlapack_sequential(plasma);

    return PLASMA_SUCCESS;
}
Beispiel #24
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex32_t
 *
 *  PLASMA_csyrk - Performs one of the hermitian rank k operations
 *
 *    \f[ C = \alpha [ op( A ) \times conjfg( op( A )' )] + \beta C \f],
 *
 *  where op( X ) is one of
 *
 *    op( X ) = X  or op( X ) = conjfg( X' )
 *
 *  where alpha and beta are real scalars, C is an n-by-n hermitian
 *  matrix and A is an n-by-k matrix in the first case and a k-by-n
 *  matrix in the second case.
 *
 *******************************************************************************
 *
 * @param[in] uplo
 *          = PlasmaUpper: Upper triangle of C is stored;
 *          = PlasmaLower: Lower triangle of C is stored.
 *
 * @param[in] trans
 *          Specifies whether the matrix A is transposed or conjfugate transposed:
 *          = PlasmaNoTrans:   A is not transposed;
 *          = PlasmaTrans  :   A is transposed.
 *
 * @param[in] N
 *          N specifies the order of the matrix C. N must be at least zero.
 *
 * @param[in] K
 *          K specifies the number of columns of the matrix op( A ).
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha.
 *
 * @param[in] A
 *          A is a LDA-by-ka matrix, where ka is K when trans = PlasmaNoTrans,
 *          and is N otherwise.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA must be at least
 *          max( 1, N ), otherwise LDA must be at least max( 1, K ).
 *
 * @param[in] beta
 *          beta specifies the scalar beta
 *
 * @param[in,out] C
 *          C is a LDC-by-N matrix.
 *          On exit, the array uplo part of the matrix is overwritten
 *          by the uplo part of the updated matrix.
 *
 * @param[in] LDC
 *          The leading dimension of the array C. LDC >= max( 1, N ).
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 *******************************************************************************
 *
 * @sa PLASMA_csyrk_Tile
 * @sa PLASMA_csyrk
 * @sa PLASMA_dsyrk
 * @sa PLASMA_ssyrk
 *
 ******************************************************************************/
int PLASMA_csyrk(PLASMA_enum uplo, PLASMA_enum trans, int N, int K,
                 PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
                 PLASMA_Complex32_t beta,  PLASMA_Complex32_t *C, int LDC)
{
    int NB;
    int Am, An;
    int status;
    PLASMA_desc descA, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_csyrk", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Check input arguments */
    if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
        plasma_error("PLASMA_csyrk", "illegal value of uplo");
        return -1;
    }
    if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
        plasma_error("PLASMA_csyrk", "illegal value of trans");
        return -2;
    }
    if ( trans == PlasmaNoTrans ) { 
        Am = N; An = K;
    } else {
        Am = K; An = N;
    }
    if (N < 0) {
        plasma_error("PLASMA_csyrk", "illegal value of N");
        return -3;
    }
    if (K < 0) {
        plasma_error("PLASMA_csyrk", "illegal value of K");
        return -4;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_csyrk", "illegal value of LDA");
        return -7;
    }
    if (LDC < max(1, N)) {
        plasma_error("PLASMA_csyrk", "illegal value of LDC");
        return -10;
    }

    /* Quick return */
    if (N == 0 ||
        ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (PLASMA_Complex32_t)1.0))
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CSYRK, N, K, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_csyrk", "plasma_tune() failed");
        return status;
    }

    /* Set MT & NT & KT */
    NB = PLASMA_NB;

    plasma_sequence_create(plasma, &sequence);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, N,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
    } else {
Beispiel #25
0
/***************************************************************************//**
 *
 * @ingroup float
 *
 *  PLASMA_sgemm - Performs one of the matrix-matrix operations
 *
 *    \f[ C = \alpha [op( A )\times op( B )] + \beta C \f],
 *
 *  where op( X ) is one of
 *
 *    op( X ) = X  or op( X ) = X' or op( X ) = g( X' )
 *
 *  alpha and beta are scalars, and A, B and C  are matrices, with op( A )
 *  an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
 *
 *******************************************************************************
 *
 * @param[in] transA
 *          Specifies whether the matrix A is transposed, not transposed or ugate transposed:
 *          = PlasmaNoTrans:   A is not transposed;
 *          = PlasmaTrans:     A is transposed;
 *          = PlasmaTrans: A is ugate transposed.
 *
 * @param[in] transB
 *          Specifies whether the matrix B is transposed, not transposed or ugate transposed:
 *          = PlasmaNoTrans:   B is not transposed;
 *          = PlasmaTrans:     B is transposed;
 *          = PlasmaTrans: B is ugate transposed.
 *
 * @param[in] M
 *          M specifies the number of rows of the matrix op( A ) and of the matrix C. M >= 0.
 *
 * @param[in] N
 *          N specifies the number of columns of the matrix op( B ) and of the matrix C. N >= 0.
 *
 * @param[in] K
 *          K specifies the number of columns of the matrix op( A ) and the number of rows of
 *          the matrix op( B ). K >= 0.
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha
 *
 * @param[in] A
 *          A is a LDA-by-ka matrix, where ka is K when  transA = PlasmaNoTrans,
 *          and is  M  otherwise.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,M).
 *
 * @param[in] B
 *          B is a LDB-by-kb matrix, where kb is N when  transB = PlasmaNoTrans,
 *          and is  K  otherwise.
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= max(1,N).
 *
 * @param[in] beta
 *          beta specifies the scalar beta
 *
 * @param[in,out] C
 *          C is a LDC-by-N matrix.
 *          On exit, the array is overwritten by the M by N matrix ( alpha*op( A )*op( B ) + beta*C )
 *
 * @param[in] LDC
 *          The leading dimension of the array C. LDC >= max(1,M).
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 *******************************************************************************
 *
 * @sa PLASMA_sgemm_Tile
 * @sa PLASMA_cgemm
 * @sa PLASMA_dgemm
 * @sa PLASMA_sgemm
 *
 ******************************************************************************/
int PLASMA_sgemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
                 float alpha, float *A, int LDA,
                 float *B, int LDB,
                 float beta,  float *C, int LDC)
{
    int NB;
    int Am, An, Bm, Bn;
    int status;
    PLASMA_desc descA, descB, descC;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_sgemm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Check input arguments */
    if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaTrans)) {
        plasma_error("PLASMA_sgemm", "illegal value of transA");
        return -1;
    }
    if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaTrans)) {
        plasma_error("PLASMA_sgemm", "illegal value of transB");
        return -2;
    }
    if ( transA == PlasmaNoTrans ) {
        Am = M;
        An = K;
    } else {
        Am = K;
        An = M;
    }
    if ( transB == PlasmaNoTrans ) {
        Bm = K;
        Bn = N;
    } else {
        Bm = N;
        Bn = K;
    }
    if (M < 0) {
        plasma_error("PLASMA_sgemm", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_sgemm", "illegal value of N");
        return -4;
    }
    if (K < 0) {
        plasma_error("PLASMA_sgemm", "illegal value of N");
        return -5;
    }
    if (LDA < max(1, Am)) {
        plasma_error("PLASMA_sgemm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, Bm)) {
        plasma_error("PLASMA_sgemm", "illegal value of LDB");
        return -10;
    }
    if (LDC < max(1, M)) {
        plasma_error("PLASMA_sgemm", "illegal value of LDC");
        return -13;
    }

    /* Quick return */
    if (M == 0 || N == 0 ||
            ((alpha == (float)0.0 || K == 0) && beta == (float)1.0))
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_SGEMM, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_sgemm", "plasma_tune() failed");
        return status;
    }

    /* Set MT & NT & KT */
    NB = PLASMA_NB;

    plasma_sequence_create(plasma, &sequence);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_sooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
        plasma_sooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
        plasma_sooplap2tile( descC, C, NB, NB, LDC, N,  0, 0, M,  N,  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
    } else {
Beispiel #26
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t
 *
 * PLASMA_zgetri - Computes the inverse of a matrix using the LU factorization
 * computed by PLASMA_zgetrf.
 * This method inverts U and then computes inv(A) by solving the system
 * inv(A)*L = inv(U) for inv(A).
 *
 *******************************************************************************
 *
 * @param[in] N
 *          The order of the matrix A. N >= 0.
 *
 * @param[in,out] A
 *          On entry, the triangular factor L or U from the
 *          factorization A = P*L*U as computed by PLASMA_zgetrf.
 *          On exit, if return value = 0, the inverse of the original
 *          matrix A.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,N).
 *
 * @param[in] IPIV
 *          The pivot indices that define the permutations
 *          as returned by PLASMA_zgetrf.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *          \retval >0 if i, the (i,i) element of the factor U is
 *                exactly zero; The matrix is singular
 *                and its inverse could not be computed.
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgetri_Tile
 * @sa PLASMA_zgetri_Tile_Async
 * @sa PLASMA_cgetri
 * @sa PLASMA_dgetri
 * @sa PLASMA_sgetri
 * @sa PLASMA_zgetrf
 *
 ******************************************************************************/
int PLASMA_zgetri(int N,
                  PLASMA_Complex64_t *A, int LDA,
                  int *IPIV)
{
    int NB;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA;
    PLASMA_desc descW;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetri", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (N < 0) {
        plasma_error("PLASMA_zgetri", "illegal value of N");
        return -1;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_zgetri", "illegal value of LDA");
        return -3;
    }
    /* Quick return */
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetri", "plasma_tune() failed");
        return status;
    }

    /* Set NT */
    NB   = PLASMA_NB;

    plasma_sequence_create(plasma, &sequence);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request,
                             plasma_desc_mat_free(&(descA)) );
    } else {
        plasma_ziplap2tile(  descA, A, NB, NB, LDA, N, 0, 0, N, N,
                            sequence, &request);
    }

    /* Allocate workspace */
    PLASMA_Alloc_Workspace_zgetri_Tile_Async(&descA, &descW);

    /* Call the tile interface */
    PLASMA_zgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request);
        plasma_dynamic_sync();
        plasma_desc_mat_free(&descA);
    } else {
        plasma_ziptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request);
        plasma_dynamic_sync();
    }
    plasma_desc_mat_free(&(descW));

    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}
Beispiel #27
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zgetri_Tile_Async - Computes the inverse of a matrix using the LU
 *  factorization computed by PLASMA_zgetrf.
 *  This method inverts U and then computes inv(A) by solving the system
 *  inv(A)*L = inv(U) for inv(A).
 *  Non-blocking equivalent of PLASMA_zgetri_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgetri
 * @sa PLASMA_zgetri_Tile
 * @sa PLASMA_cgetri_Tile_Async
 * @sa PLASMA_dgetri_Tile_Async
 * @sa PLASMA_sgetri_Tile_Async
 * @sa PLASMA_zgetrf_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zgetri_Tile_Async(PLASMA_desc *A, int *IPIV, PLASMA_desc *W,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA;
    PLASMA_desc descW;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile_Async", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(A) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetri_Tile_Async", "invalid A descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descA = *A;
    }
    /* Check descriptors for correctness */
    if (plasma_desc_check(W) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetri_Tile_Async", "invalid W descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descW = *W;
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_zgetri_Tile_Async", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Quick return */
    if (max(descA.m, 0) == 0)
        return PLASMA_SUCCESS;

    plasma_dynamic_call_5(plasma_pztrtri,
        PLASMA_enum, PlasmaUpper,
        PLASMA_enum, PlasmaNonUnit,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    plasma_dynamic_call_9(plasma_pztrsmrv,
        PLASMA_enum, PlasmaRight,
        PLASMA_enum, PlasmaLower,
        PLASMA_enum, PlasmaNoTrans,
        PLASMA_enum, PlasmaUnit,
        PLASMA_Complex64_t, (PLASMA_Complex64_t) 1.0,
        PLASMA_desc, descA,
        PLASMA_desc, descW,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    /* No need for barrier tile2row because of previous dependencies */

    /* swap */
    plasma_dynamic_call_5(
        plasma_pzlaswpc,
        PLASMA_desc, descA,
        int *,       IPIV,
        int,         -1,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    plasma_dynamic_call_3(
        plasma_pzbarrier_row2tl,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    return PLASMA_SUCCESS;
}
Beispiel #28
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex32_t
 *
 *  PLASMA_cgelqs - Compute a minimum-norm solution min || A*X - B || using the LQ factorization
 *  A = L*Q computed by PLASMA_cgelqf.
 *
 *******************************************************************************
 *
 * @param[in] M
 *          The number of rows of the matrix A. M >= 0.
 *
 * @param[in] N
 *          The number of columns of the matrix A. N >= M >= 0.
 *
 * @param[in] NRHS
 *          The number of columns of B. NRHS >= 0.
 *
 * @param[in] A
 *          Details of the LQ factorization of the original matrix A as returned by PLASMA_cgelqf.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= M.
 *
 * @param[in] T
 *          Auxiliary factorization data, computed by PLASMA_cgelqf.
 *
 * @param[in,out] B
 *          On entry, the M-by-NRHS right hand side matrix B.
 *          On exit, the N-by-NRHS solution matrix X.
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= N.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *
 *******************************************************************************
 *
 * @sa PLASMA_cgelqs_Tile
 * @sa PLASMA_cgelqs_Tile_Async
 * @sa PLASMA_cgelqs
 * @sa PLASMA_dgelqs
 * @sa PLASMA_sgelqs
 * @sa PLASMA_cgelqf
 *
 ******************************************************************************/
int PLASMA_cgelqs(int M, int N, int NRHS,
                  PLASMA_Complex32_t *A, int LDA,
                  PLASMA_Complex32_t *T,
                  PLASMA_Complex32_t *B, int LDB)
{
    int NB, IB, IBNB, MT, NT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB, descT;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cgelqs", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (M < 0) {
        plasma_error("PLASMA_cgelqs", "illegal value of M");
        return -1;
    }
    if (N < 0 || M > N) {
        plasma_error("PLASMA_cgelqs", "illegal value of N");
        return -2;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_cgelqs", "illegal value of N");
        return -3;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_cgelqs", "illegal value of LDA");
        return -5;
    }
    if (LDB < max(1, max(1, N))) {
        plasma_error("PLASMA_cgelqs", "illegal value of LDB");
        return -8;
    }
    /* Quick return */
    if (min(M, min(N, NRHS)) == 0) {
        return PLASMA_SUCCESS;
    }

    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_FUNC_CGELS, M, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cgelqs", "plasma_tune() failed");
        return status;
    }

    /* Set MT, NT & NTRHS */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);

    plasma_sequence_create(plasma, &sequence);

    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        descT = plasma_desc_init(
                    PlasmaComplexFloat,
                    IB, NB, IBNB,
                    MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
    }
    else {
        /* Double the size of T to accomodate the tree reduction phase */
        descT = plasma_desc_init(
                    PlasmaComplexFloat,
                    IB, NB, IBNB,
                    MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
    }
    descT.mat = T;

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_cooplap2tile( descA, A, NB, NB, LDA, N,    0, 0, M, N   , plasma_desc_mat_free(&(descA)) );
        plasma_cooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {
Beispiel #29
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t
 *
 *  PLASMA_zgesvd - computes the singular value decomposition (SVD) of a complex
 *  M-by-N matrix A, optionally computing the left and/or right singular
 *  vectors. The SVD is written
 *
 *       A = U * SIGMA * transpose(V)
 *
 *  where SIGMA is an M-by-N matrix which is zero except for its
 *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 *  are the singular values of A; they are real and non-negative, and
 *  are returned in descending order.  The first min(m,n) columns of
 *  U and V are the left and right singular vectors of A.
 *
 *  Note that the routine returns V**T, not V.
 *  Not LAPACK Compliant for now!
 *  Note: Only PlasmaNoVec supported!
 *******************************************************************************
 *
 * @param[in] jobu
 *          Specifies options for computing all or part of the matrix U.
 *          Intended usage:
 *          = PlasmaVec: all M columns of U are returned in array U;
 *          = PlasmaNoVec: no columns of U (no left singular vectors) are
 *                     computed.
 *          Note: Only PlasmaNoVec supported!
 *
 * @param[in] jobvt
 *          Specifies options for computing all or part of the matrix V**H.
 *          Intended usage:
 *          = PlasmaVec: all M columns of U are returned in array U;
 *          = PlasmaNoVec: no columns of U (no left singular vectors) are
 *                     computed.
 *          Note: Only PlasmaNoVec supported!
 *
 * @param[in] M
 *          The number of rows of the matrix A. M >= 0.
 *
 * @param[in] N
 *          The number of columns of the matrix A. N >= 0.
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix A.
 *          On exit,
 *          if JOBU = 'O',  A is overwritten with the first min(m,n)
 *                          columns of U (the left singular vectors,
 *                          stored columnwise);
 *          if JOBVT = 'O', A is overwritten with the first min(m,n)
 *                          rows of V**H (the right singular vectors,
 *                          stored rowwise);
 *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
 *                          are destroyed.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,M).
 *
 * @param[out] S
 *          The double precision singular values of A, sorted so that S(i) >= S(i+1).
 *
 * @param[out] U
 *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
 *          If JOBU = 'A', U contains the M-by-M unitary matrix U;
 *          if JOBU = 'S', U contains the first min(m,n) columns of U
 *          (the left singular vectors, stored columnwise);
 *          if JOBU = 'N' or 'O', U is not referenced.
 *
 * @param[in] LDU
 *          The leading dimension of the array U.  LDU >= 1; if
 *          JOBU = 'S' or 'A', LDU >= M.
 *
 * @param[out] VT
 *         If JOBVT = 'A', VT contains the N-by-N unitary matrix
 *         V**H;
 *         if JOBVT = 'S', VT contains the first min(m,n) rows of
 *         V**H (the right singular vectors, stored rowwise);
 *         if JOBVT = 'N' or 'O', VT is not referenced.
 *
 * @param[in] LDVT
 *         The leading dimension of the array VT.  LDVT >= 1; if
 *         JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
 *
 * @param[in, out] descT
 *          On entry, descriptor as return by PLASMA_Alloc_Workspace_zgesvd
 *          On exit, contains auxiliary factorization data.
 *
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgesvd_Tile
 * @sa PLASMA_zgesvd_Tile_Async
 * @sa PLASMA_cgesvd
 * @sa PLASMA_dgesvd
 * @sa PLASMA_sgesvd
 *
 ******************************************************************************/
int PLASMA_zgesvd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N,
                  PLASMA_Complex64_t *A, int LDA,
                  double *S,
                  PLASMA_Complex64_t *U, int LDU,
                  PLASMA_Complex64_t *VT, int LDVT,
                  PLASMA_desc *descT)
{
    int NB, IB, IBNB, minMN, MT, NT, minMTNT;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descU, descVT;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    
    /* Tune NB & IB depending on M & N; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgesvd", "plasma_tune() failed");
        return status;
    }

    /* Set MT, NT */
    NB    = PLASMA_NB;
    IB    = PLASMA_IB;
    IBNB  = IB*NB;
    MT    = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT    = (N%NB==0) ? (N/NB) : (N/NB+1);
    minMN = min(M,N);
    minMTNT = min(MT,NT);

    /* Check input arguments */
    if (jobu != PlasmaNoVec  && jobu !=PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "illegal value of jobu");
        return -1;
    }
    if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "illegal value of jobvt");
        return -2;
    }
    if (M < 0) {
        plasma_error("PLASMA_zgesvd", "illegal value of M");
        return -3;
    }
    if (N < 0) {
        plasma_error("PLASMA_zgesvd", "illegal value of N");
        return -4;
    }
    if (LDA < max(1, M)) {
        plasma_error("PLASMA_zgesvd", "illegal value of LDA");
        return -6;
    }
    if (LDU < 1) {
        plasma_error("PLASMA_zgesvd", "illegal value of LDU");
        return -9;
    }
    if (LDVT < 1) {
        plasma_error("PLASMA_zgesvd", "illegal value of LDVT");
        return -11;
    }
    if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) || 
         ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
        plasma_error("PLASMA_zgesvd", "invalid T descriptor");
        return -12;
    }
    /* Quick return */
    if (min(M, N) == 0) {
        return PLASMA_SUCCESS;
    }

    if (jobu == PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
        return -1;
    }
    if (jobvt == PlasmaVec) {
        plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
        return -2;
    }

    plasma_sequence_create(plasma, &sequence);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_zooplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
        if (jobu == PlasmaVec){
            plasma_zooplap2tile( descU,   U, NB, NB,  LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
        }
Beispiel #30
0
/***************************************************************************//**
 *
 * @ingroup double
 *
 *  PLASMA_dtrmm - Computes B = alpha*op( A )*B or B = alpha*B*op( A ).
 *
 *******************************************************************************
 *
 * @param[in] side
 *          Specifies whether A appears on the left or on the right of X:
 *          = PlasmaLeft:  A*X = B
 *          = PlasmaRight: X*A = B
 *
 * @param[in] uplo
 *          Specifies whether the matrix A is upper triangular or lower triangular:
 *          = PlasmaUpper: Upper triangle of A is stored;
 *          = PlasmaLower: Lower triangle of A is stored.
 *
 * @param[in] transA
 *          Specifies whether the matrix A is transposed, not transposed or ugate transposed:
 *          = PlasmaNoTrans:   A is transposed;
 *          = PlasmaTrans:     A is not transposed;
 *          = PlasmaTrans: A is ugate transposed.
 *
 * @param[in] diag
 *          Specifies whether or not A is unit triangular:
 *          = PlasmaNonUnit: A is non unit;
 *          = PlasmaUnit:    A us unit.
 *
 * @param[in] N
 *          The order of the matrix A. N >= 0.
 *
 * @param[in] NRHS
 *          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
 *
 * @param[in] alpha
 *          alpha specifies the scalar alpha.
 *
 * @param[in] A
 *          The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular
 *          part of the array A contains the upper triangular matrix, and the strictly lower
 *          triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N
 *          lower triangular part of the array A contains the lower triangular matrix, and the
 *          strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the
 *          diagonal elements of A are also not referenced and are assumed to be 1.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,N).
 *
 * @param[in,out] B
 *          On entry, the N-by-NRHS right hand side matrix B.
 *          On exit, if return value = 0, the N-by-NRHS solution matrix X.
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= max(1,N).
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *          \retval <0 if -i, the i-th argument had an illegal value
 *
 *******************************************************************************
 *
 * @sa PLASMA_dtrmm_Tile
 * @sa PLASMA_dtrmm_Tile_Async
 * @sa PLASMA_ctrmm
 * @sa PLASMA_dtrmm
 * @sa PLASMA_strmm
 *
 ******************************************************************************/
int PLASMA_dtrmm(PLASMA_enum side, PLASMA_enum uplo,
                 PLASMA_enum transA, PLASMA_enum diag,
                 int N, int NRHS, double alpha,
                 double *A, int LDA,
                 double *B, int LDB)
{
    int NB, NA;
    int status;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
    PLASMA_desc descA, descB;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dtrmm", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check input arguments */
    if (side != PlasmaLeft && side != PlasmaRight) {
        plasma_error("PLASMA_dtrmm", "illegal value of side");
        return -1;
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_dtrmm", "illegal value of uplo");
        return -2;
    }
    if (transA != PlasmaTrans && transA != PlasmaNoTrans && transA != PlasmaTrans ) {
        plasma_error("PLASMA_dtrmm", "illegal value of transA");
        return -3;
    }
    if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
        plasma_error("PLASMA_dtrmm", "illegal value of diag");
        return -4;
    }
    if (N < 0) {
        plasma_error("PLASMA_dtrmm", "illegal value of N");
        return -5;
    }
    if (NRHS < 0) {
        plasma_error("PLASMA_dtrmm", "illegal value of NRHS");
        return -6;
    }
    if (LDA < max(1, N)) {
        plasma_error("PLASMA_dtrmm", "illegal value of LDA");
        return -8;
    }
    if (LDB < max(1, N)) {
        plasma_error("PLASMA_dtrmm", "illegal value of LDB");
        return -10;
    }
    /* Quick return */
    if (min(N, NRHS) == 0)
        return PLASMA_SUCCESS;

    /* Tune NB depending on M, N & NRHS; Set NBNB */
    status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
    if (status != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dtrmm", "plasma_tune() failed");
        return status;
    }

    /* Set NT & NTRHS */
    NB = PLASMA_NB;
    if (side == PlasmaLeft) {
      NA = N;
    } else {
      NA = NRHS;
    }

    plasma_sequence_create(plasma, &sequence);

    if ( PLASMA_TRANSLATION == PLASMA_OUTOFPLACE ) {
        plasma_dooplap2tile( descA, A, NB, NB, LDA, NA,   0, 0, NA, NA,   plasma_desc_mat_free(&(descA)) );
        plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N,  NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
    } else {