예제 #1
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zlaswp_Tile_Async - performs a series of row interchanges
 *  on the matrix A.  One row interchange is initiated for each of
 *  rows K1 through K2 of A.
 *  Non-blocking equivalent of PLASMA_zlaswp_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zlaswp
 * @sa PLASMA_zlaswp_Tile
 * @sa PLASMA_claswp_Tile_Async
 * @sa PLASMA_dlaswp_Tile_Async
 * @sa PLASMA_slaswp_Tile_Async
 * @sa PLASMA_zgetrf_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zlaswp_Tile_Async(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    if ( (K1 != 1) || (K2 != descA.m) ) {
        plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    plasma_dynamic_call_3(
        plasma_pzbarrier_tl2pnl,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    /* swap */
    plasma_dynamic_call_5(
        plasma_pzlaswp,
        PLASMA_desc, descA,
        int *,       IPIV,
        int,         INCX,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);
    
    plasma_dynamic_call_3(
        plasma_pzbarrier_pnl2tl,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    return PLASMA_SUCCESS;
}
예제 #2
0
/***************************************************************************//**
 *
 * @ingroup double_Tile_Async
 *
 *  PLASMA_dsygst_Tile_Async - reduces a complex Hermitian-definite
 *  generalized eigenproblem to standard form.
 *  If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is
 *  overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
 *  If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x
 *  = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.  B must
 *  have been previously factorized as U**T*U or L*L**T by
 *  PLASMA_DPOTRF.
 *  ONLY PlasmaItype == 1 and PlasmaLower supported!
 *  Non-blocking equivalent of PLASMA_dsygst_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_dsygst
 * @sa PLASMA_dsygst_Tile
 * @sa PLASMA_chegst_Tile_Async
 * @sa PLASMA_dsygst_Tile_Async
 * @sa PLASMA_ssygst_Tile_Async
 * @sa PLASMA_dsygv_Tile_Async
 *
 ******************************************************************************/
int PLASMA_dsygst_Tile_Async(PLASMA_enum itype, PLASMA_enum uplo, 
                             PLASMA_desc *A, 
                             PLASMA_desc *B,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    PLASMA_desc descB = *B;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_dsygst_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_dsygst_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_dsygst_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsygst_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_dsygst_Tile", "invalid second descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_dsygst_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    
    /* 
     * Transform Hermitian-definite generalized eigenproblem 
     * to standard form
     */
    plasma_dynamic_call_6(plasma_pdsygst,
        PLASMA_enum, itype,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        PLASMA_desc, descB,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    return PLASMA_SUCCESS;
}
예제 #3
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex32_t_Tile_Async
 *
 *  PLASMA_cpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric
 *  positive definite or Hermitian positive definite matrix.
 *  Non-blocking equivalent of PLASMA_cpotrf_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_cpotrf
 * @sa PLASMA_cpotrf_Tile
 * @sa PLASMA_cpotrf_Tile_Async
 * @sa PLASMA_dpotrf_Tile_Async
 * @sa PLASMA_spotrf_Tile_Async
 * @sa PLASMA_cpotrs_Tile_Async
 *
 ******************************************************************************/
int PLASMA_cpotrf_Tile_Async(PLASMA_enum uplo, PLASMA_desc *A,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_cpotrf_Tile", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_cpotrf_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (uplo != PlasmaUpper && uplo != PlasmaLower) {
        plasma_error("PLASMA_cpotrf_Tile", "illegal value of uplo");
        return plasma_request_fail(sequence, request, -1);
    }
    /* Quick return */
/*
    if (max(N, 0) == 0)
        return PLASMA_SUCCESS;
*/
    plasma_parallel_call_4(plasma_pcpotrf,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    return PLASMA_SUCCESS;
}
예제 #4
0
/***************************************************************************//**
 *
 * @ingroup float_Tile_Async
 *
 *  PLASMA_splgsy_Tile_Async - Generate a random hermitian matrix by tiles.
 *  Non-blocking equivalent of PLASMA_splgsy_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_splgsy
 * @sa PLASMA_splgsy_Tile
 * @sa PLASMA_cplgsy_Tile_Async
 * @sa PLASMA_dplgsy_Tile_Async
 * @sa PLASMA_splgsy_Tile_Async
 * @sa PLASMA_splgsy_Tile_Async
 * @sa PLASMA_splgsy_Tile_Async
 *
 ******************************************************************************/
int PLASMA_splgsy_Tile_Async( float          bump,
                              PLASMA_desc     *A,
                              unsigned long long int seed,
                              PLASMA_sequence *sequence, 
                              PLASMA_request  *request)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_splgsy_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_splgsy_Tile", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_splgsy_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* Quick return */
    if (min( descA.m, descA.n ) == 0)
        return PLASMA_SUCCESS;

    plasma_parallel_call_5(plasma_psplgsy,
        float, bump,
        PLASMA_desc,        descA,
        unsigned long long int, seed,
        PLASMA_sequence*,   sequence,
        PLASMA_request*,    request);

    return PLASMA_SUCCESS;
}
예제 #5
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zgetrf_nopiv_Tile_Async - Computes the tile LU factorization of a
 *  matrix.  Non-blocking equivalent of PLASMA_zgetrf_nopiv_Tile().  May return
 *  before the computation is finished.  Allows for pipelining of operations ar
 *  runtime.
 *
 *******************************************************************************
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix to be factored.
 *          On exit, the tile factors L and U from the factorization.
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgetrf_nopiv
 * @sa PLASMA_zgetrf_nopiv_Tile
 * @sa PLASMA_cgetrf_nopiv_Tile_Async
 * @sa PLASMA_dgetrf_nopiv_Tile_Async
 * @sa PLASMA_sgetrf_nopiv_Tile_Async
 * @sa PLASMA_zgetrs_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zgetrf_nopiv_Tile_Async(PLASMA_desc *A,
                                   PLASMA_sequence *sequence,
                                   PLASMA_request *request)
{
    PLASMA_desc descA;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zgetrf_nopiv_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(A) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetrf_nopiv_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descA = *A;
    }

    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_zgetrf_nopiv_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    plasma_dynamic_call_3(plasma_pzgetrf_nopiv,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    return PLASMA_SUCCESS;
}
예제 #6
0
/***************************************************************************//**
 *
 **/
void plasma_pdlag2s(plasma_context_t *plasma)
{
    PLASMA_desc A;
    PLASMA_desc SB;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int X, Y;
    int m, n;
    int next_m;
    int next_n;
    int ldam, ldbm;
    int info = PLASMA_SUCCESS;

    plasma_unpack_args_4(A, SB, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X = m == A.mt-1 ? A.m-A.mb*m : A.nb;
        Y = n == A.nt-1 ? A.n-A.nb*n : A.nb;
        ldam = BLKLDD(A, m);
        ldbm = BLKLDD(SB, m);
        CORE_dlag2s(X, Y, A(m, n), ldam, SB(m, n), ldbm, &info);

        if (info != 0)
            plasma_request_fail(sequence, request, info);

        m = next_m;
        n = next_n;
    }
}
예제 #7
0
/***************************************************************************//**
 *
 * @ingroup float_Tile_Async
 *
 *  PLASMA_ssytrd_Tile_Async - Computes all eigenvalues and,
 *  optionally, eigenvectors of a complex Hermitian matrix A using a
 *  two-stage approach:
 *  First stage: reduction to band tridiagonal form;
 *  Second stage: reduction from band to tridiagonal form.
 *
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_ssytrd
 * @sa PLASMA_ssytrd_Tile
 * @sa PLASMA_chetrd_Tile_Async
 * @sa PLASMA_dsytrd_Tile_Async
 * @sa PLASMA_ssytrd_Tile_Async
 *
 ******************************************************************************/
int PLASMA_ssytrd_Tile_Async(PLASMA_enum jobz, PLASMA_enum uplo,
                            PLASMA_desc *A,
                            float *D,
                            float *E,
                            PLASMA_desc *T,
                            PLASMA_desc *Q,
                            PLASMA_sequence *sequence, PLASMA_request *request)
{
    int NB, IB, IBNB, NT;
    PLASMA_desc descA = *A;
    PLASMA_desc descT = *T;

    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_ssytrd_Tile_Async", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Set NT & NTRHS */
    NB   = PLASMA_NB;
    IB   = PLASMA_IB;
    IBNB = IB*NB;
    NT   = (descA.ln%NB==0) ? (descA.ln/NB) : (descA.ln/NB+1);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if ( (jobz == PlasmaVec) && (plasma_desc_check(Q) != PLASMA_SUCCESS) ) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* Check input arguments */
    if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "illegal value of jobz");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (descA.m != descA.n) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "matrix need to be square");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
   if (descA.nb != descA.mb) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (jobz == PlasmaVec) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "computing the eigenvectors is not supported in this version");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) {
        plasma_error("PLASMA_ssytrd_Tile_Async", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* Reduction to tridiagonal form
     * with a two-stage approach.
     */

    /* Reduction to BAND tridiagonal form
     */
    plasma_dynamic_call_5(plasma_pssyrbt,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        PLASMA_desc, descT,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    /* 
     * Build the Q of the first stage
     */
    /* if (jobz == PlasmaVec){ */
    /*    /\* Initialize Q to Identity *\/ */
    /*    plasma_dynamic_call_6(plasma_pslaset, */
    /*        PLASMA_enum, PlasmaUpperLower, */
    /*        float, 0.0, */
    /*        float, 1.0, */
    /*        PLASMA_desc, descQ, */
    /*        PLASMA_sequence*, sequence, */
    /*        PLASMA_request*, request); */
    /*    /\* Accumulate the transformations from the first stage*\/ */
    /*    plasma_dynamic_call_6(plasma_psorgtr, */
    /*        PLASMA_enum, uplo, */
    /*        PLASMA_desc, descA, */
    /*        PLASMA_desc, descQ, */
    /*        PLASMA_desc, descT, */
    /*        PLASMA_sequence*, sequence, */
    /*        PLASMA_request*, request); */
    /* } */

    /* Set the V's to zero before the 2nd stage (bulge chasing) */
    /*
    */
    plasma_dynamic_call_5(plasma_pslaset2,
        PLASMA_enum, uplo,
        float, 0.0,
        PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) 
                                         : plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    /* Reduction from BAND tridiagonal to the final condensed form
     */
    plasma_dynamic_call_7(plasma_pssbrdt,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        float*, D,
        float*, E,
        PLASMA_desc, descT,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);


    return PLASMA_SUCCESS;
}
예제 #8
0
/***************************************************************************//**
 *  Parallel tile LU factorization - static scheduling
 **/
void plasma_pzgetrf_incpiv(plasma_context_t *plasma)
{
    PLASMA_desc A;
    PLASMA_desc L;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;
    int ldak, ldam;
    int info;
    int tempkn, tempkm, tempmm, tempnn;
    int ib = PLASMA_IB;
    PLASMA_Complex64_t *work;

    plasma_unpack_args_5(A, L, IPIV, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*L.nb, L.dtyp);
    ss_init(A.mt, A.nt, -1);

    k = 0;
    n = PLASMA_RANK;
    while (n >= A.nt) {
        k++;
        n = n-A.nt+k;
    }
    m = k;

    while (k < min(A.mt, A.nt) && n < A.nt && !ss_aborted()) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_m++;
        if (next_m == A.mt) {
            next_n += PLASMA_SIZE;
            while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
                next_k++;
                next_n = next_n-A.nt+next_k;
            }
            next_m = next_k;
        }

        tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;

        ldak = BLKLDD(A, k);
        ldam = BLKLDD(A, m);

        if (n == k) {
            if (m == k) {
                ss_cond_wait(k, k, k-1);
                CORE_zgetrf_incpiv(
                    tempkm, tempkn, ib,
                    A(k, k), ldak,
                    IPIV(k, k), &info);
                if (info != 0 && m == A.mt-1) {
                    plasma_request_fail(sequence, request, info + A.nb*k);
                    ss_abort();
                }
                ss_cond_set(k, k, k);
            }
            else {
                ss_cond_wait(m, k, k-1);
                CORE_ztstrf(
                    tempmm, tempkn, ib, A.nb,
                    A(k, k), ldak,
                    A(m, k), ldam,
                    L(m, k), L.mb,
                    IPIV(m, k),
                    work, L.nb, &info);
                if (info != 0 && m == A.mt-1) {
                    plasma_request_fail(sequence, request, info + A.nb*k);
                    ss_abort();
                }
                ss_cond_set(m, k, k);
            }
        }
        else {
            if (m == k) {
                ss_cond_wait(k, k, k);
                ss_cond_wait(k, n, k-1);
                CORE_zgessm(
                    tempkm, tempnn, tempkm, ib,
                    IPIV(k, k),
                    A(k, k), ldak,
                    A(k, n), ldak);
            }
            else {
                ss_cond_wait(m, k, k);
                ss_cond_wait(m, n, k-1);
                CORE_zssssm(
                    A.nb, tempnn, tempmm, tempnn, A.nb, ib,
                    A(k, n), ldak,
                    A(m, n), ldam,
                    L(m, k), L.mb,
                    A(m, k), ldam,
                    IPIV(m, k));
                ss_cond_set(m, n, k);
            }
        }
        n = next_n;
        m = next_m;
        k = next_k;
    }
    plasma_private_free(plasma, work);
    ss_finalize();
}
예제 #9
0
/***************************************************************************//**
 *  Parallel application of Q using tile V - LQ factorization - static scheduling
 **/
void plasma_pzunmlq(plasma_context_t *plasma)
{
    PLASMA_enum side;
    PLASMA_enum trans;
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_desc T;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;
    int ldak, ldbk, ldbm;
    int tempmm, tempnn, tempkm, tempkmin;
    int minMT, minM;
    int ib = PLASMA_IB;
    PLASMA_Complex64_t *work;

    plasma_unpack_args_7(side, trans, A, B, T, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    if (side != PlasmaLeft) {
        plasma_request_fail(sequence, request, PLASMA_ERR_NOT_SUPPORTED);
        return;
    }
    if (trans != PlasmaConjTrans) {
        plasma_request_fail(sequence, request, PLASMA_ERR_NOT_SUPPORTED);
        return;
    }

    work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
    ss_init(B.mt, B.nt, min(A.mt, A.nt));

    if (A.m > A.n) {
        minM  = A.n;
        minMT = A.nt;
    } else {
        minM  = A.m;
        minMT = A.mt;
    }

    k = minMT-1;
    n = PLASMA_RANK;
    while (n >= B.nt) {
        k--;
        n = n-B.nt;
    }
    m = B.mt-1;

    while (k >= 0 && n < B.nt) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_m--;
        if (next_m == k-1) {
            next_n += PLASMA_SIZE;
            while (next_n >= B.nt && next_k >= 0) {
                next_k--;
                next_n = next_n-B.nt;
            }
            next_m = B.mt-1;
        }

        tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
        tempkm   = k == B.mt-1 ? B.m-k*B.mb : B.mb;
        tempnn   = n == B.nt-1 ? B.n-n*B.nb : B.nb;
        tempmm   = m == B.mt-1 ? B.m-m*B.mb : B.mb;

        ldak = BLKLDD(A, k);
        ldbk = BLKLDD(B, k);
        ldbm = BLKLDD(B, m);

        if (m == k) {
            CORE_zunmlq(
                    side, trans,
                    tempkm, tempnn, tempkmin, ib,
                    A(k, k), ldak,
                    T(k, k), T.mb,
                    B(k, n), ldbk,
                    work, T.nb);
            ss_cond_set(k, n, k);
        }
        else {
            ss_cond_wait(m, n, k+1);
            CORE_ztsmlq(
                    side, trans,
                    A.mb, tempnn, tempmm, tempnn, tempkmin, ib,
                    B(k, n), ldbk,
                    B(m, n), ldbm,
                    A(k, m), ldak,
                    T(k, m), T.mb,
                    work, ib);
            ss_cond_set(m, n, k);
        }
        m = next_m;
        n = next_n;
        k = next_k;
    }
    plasma_private_free(plasma, work);
    ss_finalize();
}
예제 #10
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zgetri_Tile_Async - Computes the inverse of a matrix using the LU
 *  factorization computed by PLASMA_zgetrf.
 *  This method inverts U and then computes inv(A) by solving the system
 *  inv(A)*L = inv(U) for inv(A).
 *  Non-blocking equivalent of PLASMA_zgetri_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zgetri
 * @sa PLASMA_zgetri_Tile
 * @sa PLASMA_cgetri_Tile_Async
 * @sa PLASMA_dgetri_Tile_Async
 * @sa PLASMA_sgetri_Tile_Async
 * @sa PLASMA_zgetrf_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zgetri_Tile_Async(PLASMA_desc *A, int *IPIV, PLASMA_desc *W,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA;
    PLASMA_desc descW;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile_Async", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zgetri_Tile_Async", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(A) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetri_Tile_Async", "invalid A descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descA = *A;
    }
    /* Check descriptors for correctness */
    if (plasma_desc_check(W) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zgetri_Tile_Async", "invalid W descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descW = *W;
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_zgetri_Tile_Async", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Quick return */
    if (max(descA.m, 0) == 0)
        return PLASMA_SUCCESS;

    plasma_dynamic_call_5(plasma_pztrtri,
        PLASMA_enum, PlasmaUpper,
        PLASMA_enum, PlasmaNonUnit,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    plasma_dynamic_call_9(plasma_pztrsmrv,
        PLASMA_enum, PlasmaRight,
        PLASMA_enum, PlasmaLower,
        PLASMA_enum, PlasmaNoTrans,
        PLASMA_enum, PlasmaUnit,
        PLASMA_Complex64_t, (PLASMA_Complex64_t) 1.0,
        PLASMA_desc, descA,
        PLASMA_desc, descW,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    /* No need for barrier tile2row because of previous dependencies */

    /* swap */
    plasma_dynamic_call_5(
        plasma_pzlaswpc,
        PLASMA_desc, descA,
        int *,       IPIV,
        int,         -1,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    plasma_dynamic_call_3(
        plasma_pzbarrier_row2tl,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*,  request);

    return PLASMA_SUCCESS;
}
예제 #11
0
/***************************************************************************//**
 *
 * @ingroup PLASMA_Complex64_t_Tile_Async
 *
 *  PLASMA_zlansy_Tile_Async - Non-blocking equivalent of PLASMA_zlansy_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_zlansy
 * @sa PLASMA_zlansy_Tile
 * @sa PLASMA_clansy_Tile_Async
 * @sa PLASMA_dlansy_Tile_Async
 * @sa PLASMA_slansy_Tile_Async
 *
 ******************************************************************************/
int PLASMA_zlansy_Tile_Async(PLASMA_enum norm, PLASMA_enum uplo, PLASMA_desc *A, double *value,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA;
    double *work = NULL;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_zlansy_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_zlansy_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_zlansy_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(A) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_zlansy_Tile", "invalid descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    } else {
        descA = *A;
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_zlansy_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if ( (norm != PlasmaMaxNorm) && (norm != PlasmaOneNorm)
         && (norm != PlasmaInfNorm) && (norm != PlasmaFrobeniusNorm) ) {
        plasma_error("PLASMA_zlansy_Tile", "illegal value of norm");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if ( (uplo != PlasmaUpper) && (uplo != PlasmaLower) ) {
        plasma_error("PLASMA_zlansy_Tile", "illegal value of uplo");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Quick return */
    if ( descA.m == 0) {
        *value = 0.0;
        return PLASMA_SUCCESS;
    }

    if (PLASMA_SCHEDULING == PLASMA_STATIC_SCHEDULING) {
        if (norm == PlasmaFrobeniusNorm) {
            work = plasma_shared_alloc(plasma, 2*PLASMA_SIZE, PlasmaRealDouble );
        } else {
            work = plasma_shared_alloc(plasma,   PLASMA_SIZE, PlasmaRealDouble );
        }
    }

    plasma_parallel_call_7(plasma_pzlansy,
        PLASMA_enum, norm,
        PLASMA_enum, uplo,
        PLASMA_desc, descA,
        double*, work,
        double*, value,
        PLASMA_sequence*, sequence,
        PLASMA_request*, request);

    if (work != NULL)
        plasma_shared_free( plasma, work );

    return PLASMA_SUCCESS;
}
예제 #12
0
/** ****************************************************************************
 *
 * @ingroup InPlaceTransformation
 *
 *  plasma_dgetmi2 Implementation of inplace transposition
 *    based on the GKK algorithm by Gustavson, Karlsson, Kagstrom.
 *    This algorithm shift some cycles to transpose the matrix.
 *
 *******************************************************************************
 *
 * @param[in] m
 *         Number of rows of matrix A
 *
 * @param[in] n
 *         Number of columns of matrix A
 *
 * @param[in,out] A
 *         Matrix of size L*m*n
 *
 * @param[in] nprob
 *         Number of parallel and independant problems
 *
 * @param[in] me
 *         Number of rows of the problem
 *
 * @param[in] ne
 *         Number of columns in the problem
 *
 * @param[in] L
 *         Size of chunk to use for transformation
 *
 ******************************************************************************/
int plasma_dshift(plasma_context_t *plasma, int m, int n, double *A,
                  int nprob, int me, int ne, int L,
                  PLASMA_sequence *sequence, PLASMA_request *request) 
{
    int *leaders = NULL;
    int ngrp, thrdbypb, thrdtot, nleaders;

    /* Check Plasma context */
    thrdtot  = PLASMA_SIZE;
    thrdbypb = PLASMA_GRPSIZE;
    ngrp = thrdtot/thrdbypb;

    /* check input */
    if( (nprob * me * ne * L) != (m * n) ) {
        plasma_error(__func__, "problem size does not match matrix size");
        /*printf("m=%d,  n=%d,  nprob=%d,  me=%d,  ne=%d, L=%d\n", m, n, nprob, me, ne, L);*/
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if( thrdbypb > thrdtot ) {
        plasma_error(__func__, "number of thread per problem must be less or equal to total number of threads");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if( (thrdtot % thrdbypb) != 0 ) {
        plasma_error(__func__, "number of thread per problem must divide the total number of thread");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }

    /* quick return */
    if( (me < 2) || (ne < 2) || (nprob < 1) ) {
        return PLASMA_SUCCESS;
    }

    GKK_getLeaderNbr(me, ne, &nleaders, &leaders);
    nleaders *= 3;

    if (PLASMA_SCHEDULING == PLASMA_STATIC_SCHEDULING) {
        int *Tp      = NULL;
        int i, ipb;
        int q, owner;

        q = me*ne - 1;
    
        Tp = (int *)plasma_shared_alloc(plasma, thrdtot, PlasmaInteger);
        for (i=0; i<thrdtot; i++)
            Tp[i] = 0;

        ipb = 0;
        
        /* First part with coarse parallelism */
        if (nprob > ngrp) {
            ipb = (nprob / ngrp)*ngrp;
        
            /* loop over leader */
            if (thrdbypb > 1) {
                for (i=0; i<nleaders; i+=3) {
                    /* assign this cycle to a thread */
                    owner = minloc(thrdbypb, Tp);
                
                    /* assign it to owner */
                    Tp[owner] = Tp[owner] + leaders[i+1] * L;
                    leaders[i+2] = owner;
                }
            
                GKK_BalanceLoad(thrdbypb, Tp, leaders, nleaders, L);
            }
            else {
                for (i=0; i<nleaders; i+=3) {
                    Tp[0] = Tp[0] + leaders[i+1] * L;
                    leaders[i+2] = 0;
                }
            }

            /* shift in parallel */
            for (i=0; i< (nprob/ngrp); i++) {
                plasma_static_call_9(plasma_pdshift,
                                     int,                 me,
                                     int,                 ne,
                                     int,                 L,
                                     double*, &(A[i*ngrp*me*ne*L]),
                                     int *,               leaders,
                                     int,                 nleaders,
                                     int,                 thrdbypb,
                                     PLASMA_sequence*,    sequence,
                                     PLASMA_request*,     request);
            }
        }
예제 #13
0
/***************************************************************************//**
 *  Parallel tile Cholesky factorization - static scheduling
 **/
void plasma_pspotrf(plasma_context_t *plasma)
{
    PLASMA_enum uplo;
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;
    int ldak, ldam, ldan;
    int info;
    int tempkn, tempmn;

    float zone  = (float) 1.0;
    float mzone = (float)-1.0;

    plasma_unpack_args_4(uplo, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    ss_init(A.nt, A.nt, 0);

    k = 0;
    m = PLASMA_RANK;
    while (m >= A.nt) {
        k++;
        m = m-A.nt+k;
    }
    n = 0;

    while (k < A.nt && m < A.nt && !ss_aborted()) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_n++;
        if (next_n > next_k) {
            next_m += PLASMA_SIZE;
            while (next_m >= A.nt && next_k < A.nt) {
                next_k++;
                next_m = next_m-A.nt+next_k;
            }
            next_n = 0;
        }

        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        tempmn = m == A.nt-1 ? A.n-m*A.nb : A.nb;

        ldak = BLKLDD(A, k);
        ldan = BLKLDD(A, n);
        ldam = BLKLDD(A, m);

        if (m == k) {
            if (n == k) {
                /*
                 *  PlasmaLower
                 */
                if (uplo == PlasmaLower) {
                    CORE_spotrf(
                        PlasmaLower,
                        tempkn,
                        A(k, k), ldak,
                        &info);
                }
                /*
                 *  PlasmaUpper
                 */
                else {
                    CORE_spotrf(
                        PlasmaUpper,
                        tempkn,
                        A(k, k), ldak,
                        &info);
                }
                if (info != 0) {
                    plasma_request_fail(sequence, request, info + A.nb*k);
                    ss_abort();
                }
                ss_cond_set(k, k, 1);
            }
            else {
                ss_cond_wait(k, n, 1);
                /*
                 *  PlasmaLower
                 */
                if (uplo == PlasmaLower) {
                    CORE_ssyrk(
                         PlasmaLower, PlasmaNoTrans,
                         tempkn, A.nb,
                         -1.0, A(k, n), ldak,
                          1.0, A(k, k), ldak);
                }
                /*
                 *  PlasmaUpper
                 */
                else {
                    CORE_ssyrk(
                         PlasmaUpper, PlasmaTrans,
                         tempkn, A.nb,
                         -1.0, A(n, k), ldan,
                          1.0, A(k, k), ldak);
                }
            }
        }
        else {
            if (n == k) {
                ss_cond_wait(k, k, 1);
                /*
                 *  PlasmaLower
                 */
                if (uplo == PlasmaLower) {
                    CORE_strsm(
                        PlasmaRight, PlasmaLower, PlasmaTrans, PlasmaNonUnit,
                        tempmn, A.nb,
                        zone, A(k, k), ldak,
                              A(m, k), ldam);
                }
                /*
                 *  PlasmaUpper
                 */
                else {
                    CORE_strsm(
                        PlasmaLeft, PlasmaUpper, PlasmaTrans, PlasmaNonUnit,
                        A.nb, tempmn,
                        zone, A(k, k), ldak,
                              A(k, m), ldak);
                }
                ss_cond_set(m, k, 1);
            }
            else {
                ss_cond_wait(k, n, 1);
                ss_cond_wait(m, n, 1);
                /*
                 *  PlasmaLower
                 */
                if (uplo == PlasmaLower) {
                    CORE_sgemm(
                        PlasmaNoTrans, PlasmaTrans,
                        tempmn, A.nb, A.nb,
                        mzone, A(m, n), ldam,
                               A(k, n), ldak,
                         zone, A(m, k), ldam);
                }
                /*
                 *  PlasmaUpper
                 */
                else {
                    CORE_sgemm(
                        PlasmaTrans, PlasmaNoTrans,
                        A.nb, tempmn, A.nb,
                        mzone, A(n, k), ldan,
                               A(n, m), ldan,
                         zone, A(k, m), ldak);
                }
            }
        }
        n = next_n;
        m = next_m;
        k = next_k;
    }
    ss_finalize();
}
예제 #14
0
/***************************************************************************//**
 *
 * @ingroup float_Tile_Async
 *
 *  PLASMA_sgelqf_Tile_Async - Computes the tile LQ factorization of a matrix.
 *  Non-blocking equivalent of PLASMA_sgelqf_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations ar runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa PLASMA_sgelqf
 * @sa PLASMA_sgelqf_Tile
 * @sa PLASMA_cgelqf_Tile_Async
 * @sa PLASMA_dgelqf_Tile_Async
 * @sa PLASMA_sgelqf_Tile_Async
 * @sa PLASMA_sgelqs_Tile_Async
 *
 ******************************************************************************/
int PLASMA_sgelqf_Tile_Async(PLASMA_desc *A, PLASMA_desc *T,
                             PLASMA_sequence *sequence, PLASMA_request *request)
{
    PLASMA_desc descA = *A;
    PLASMA_desc descT = *T;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_sgelqf_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        plasma_fatal_error("PLASMA_sgelqf_Tile", "NULL sequence");
        return PLASMA_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        plasma_fatal_error("PLASMA_sgelqf_Tile", "NULL request");
        return PLASMA_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == PLASMA_SUCCESS)
        request->status = PLASMA_SUCCESS;
    else
        return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);

    /* Check descriptors for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_sgelqf_Tile", "invalid first descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_sgelqf_Tile", "invalid second descriptor");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (descA.nb != descA.mb) {
        plasma_error("PLASMA_sgelqf_Tile", "only square tiles supported");
        return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
    }
    /* Quick return */
/*
    if (min(M, N) == 0)
        return PLASMA_SUCCESS;
*/
    if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
        plasma_parallel_call_4(plasma_psgelqf,
            PLASMA_desc, descA,
            PLASMA_desc, descT,
            PLASMA_sequence*, sequence,
            PLASMA_request*, request);
    }
    else {
        plasma_dynamic_call_5(plasma_psgelqfrh,
            PLASMA_desc, descA,
            PLASMA_desc, descT,
            PLASMA_enum, PLASMA_RHBLK,
            PLASMA_sequence*, sequence,
            PLASMA_request*, request);
    }

    return PLASMA_SUCCESS;
}