示例#1
0
void warmup(Quark *q){
    int NB = 200;
    double *H = (double*) malloc(NB*NB*OOC_NTHREADS*sizeof(double));
    double *D = (double*) offload_Alloc(NB*NB*OOC_NTHREADS*sizeof(double), 0);
    
    {
        Quark_Task_Flags tflags = Quark_Task_Flags_Initializer;
//      for(int r = 0; r < OOC_NTHREADS; r++){
        for(int r = 0; r < 2; r++){
            QUARK_Task_Flag_Set(&tflags, TASK_LOCK_TO_THREAD, r);
//          QUARK_Task_Flag_Set(&tflags, THREAD_SET_TO_MANUAL_SCHEDULING, (r==0)||(r==1));
            QUARK_Insert_Task(q, CORE_H2D, &tflags,
                sizeof(int),                &NB,        VALUE,
                sizeof(int),                &NB,        VALUE,
                sizeof(double),             H+r*NB*NB,  INPUT,
                sizeof(int),                &NB,        VALUE,
                sizeof(double),             D+r*NB*NB,  OUTPUT,
                sizeof(int),                &NB,        VALUE,
                0);
            QUARK_Insert_Task(q, CORE_D2H, &tflags,
                sizeof(int),                &NB,        VALUE,
                sizeof(int),                &NB,        VALUE,
                sizeof(double),             D+r*NB*NB,  INPUT,
                sizeof(int),                &NB,        VALUE,
                sizeof(double),             H+r*NB*NB,  OUTPUT,
                sizeof(int),                &NB,        VALUE,
                0);
        }
    }
    QUARK_Barrier(q);
    offload_Free(D, 0);
    free(H);
}
示例#2
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_cgemm_p2f1(Quark *quark, Quark_Task_Flags *task_flags,
                           int transA, int transB,
                           int m, int n, int k, int nb,
                           PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int lda,
                           PLASMA_Complex32_t **B, int ldb,
                           PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int ldc,
                           PLASMA_Complex32_t *fake1, int szefake1, int flag1)
{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_cgemm_p2f1_quark, task_flags,
        sizeof(PLASMA_enum),                &transA,    VALUE,
        sizeof(PLASMA_enum),                &transB,    VALUE,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(int),                        &k,         VALUE,
        sizeof(PLASMA_Complex32_t),         &alpha,     VALUE,
        sizeof(PLASMA_Complex32_t)*lda*nb,   A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(PLASMA_Complex32_t*),         B,                 INPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(PLASMA_Complex32_t),         &beta,      VALUE,
        sizeof(PLASMA_Complex32_t)*ldc*nb,    C,                 INOUT,
        sizeof(int),                        &ldc,       VALUE,
        sizeof(PLASMA_Complex32_t)*szefake1, fake1,             flag1,
        0);
}
示例#3
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_sormqr(Quark *quark, Quark_Task_Flags *task_flags,
                       int side, int trans,
                       int m, int n, int k, int ib, int nb,
                       float *A, int lda,
                       float *T, int ldt,
                       float *C, int ldc)
{
    DAG_CORE_UNMQR;
    QUARK_Insert_Task(quark, CORE_sormqr_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(float)*nb*nb,    A,             INPUT | QUARK_REGION_L,
        sizeof(int),                        &lda,   VALUE,
        sizeof(float)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(float)*nb*nb,    C,             INOUT,
        sizeof(int),                        &ldc,   VALUE,
        sizeof(float)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &nb,    VALUE,
        0);
}
示例#4
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zssssm(Quark *quark, Quark_Task_Flags *task_flags,
                       int m1, int n1, int m2, int n2, int k, int ib, int nb,
                       PLASMA_Complex64_t *A1, int lda1,
                       PLASMA_Complex64_t *A2, int lda2,
                       const PLASMA_Complex64_t *L1, int ldl1,
                       const PLASMA_Complex64_t *L2, int ldl2,
                       const int *IPIV)
{
    DAG_CORE_SSSSM;
    QUARK_Insert_Task(quark, CORE_zssssm_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    A2,            INOUT | LOCALITY,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(PLASMA_Complex64_t)*ib*nb,    L1,            INPUT,
        sizeof(int),                        &ldl1,  VALUE,
        sizeof(PLASMA_Complex64_t)*ib*nb,    L2,            INPUT,
        sizeof(int),                        &ldl2,  VALUE,
        sizeof(int)*nb,                      IPIV,          INPUT,
        0);
}
/***************************************************************************//**
 *
 * Version of zgemm for tile storage, to avoid dependency problem when
 * computations are done within the tile. alpha and beta are passed as
 * pointers so they can depend on runtime values.
 *
 * @param[in] Alock
 *          Pointer to tile owning submatrix A.
 *
 * @param[in] Block
 *          Pointer to tile owning submatrix B.
 *
 * @param[in] Clock
 *          Pointer to tile owning submatrix C.
 *
 **/
void QUARK_CORE_zgemm_tile(Quark *quark, Quark_Task_Flags *task_flags,
                           PLASMA_enum transA, PLASMA_enum transB,
                           int m, int n, int k, int nb,
                           const PLASMA_Complex64_t *alpha, const PLASMA_Complex64_t *A, int lda,
                                                            const PLASMA_Complex64_t *B, int ldb,
                           const PLASMA_Complex64_t *beta,        PLASMA_Complex64_t *C, int ldc,
                           const PLASMA_Complex64_t *Alock,
                           const PLASMA_Complex64_t *Block,
                           const PLASMA_Complex64_t *Clock)
{
    DAG_CORE_GEMM;
    QUARK_Insert_Task(quark, CORE_zgemm_tile_quark, task_flags,
        sizeof(PLASMA_enum),              &transA, VALUE,
        sizeof(PLASMA_enum),              &transB, VALUE,
        sizeof(int),                      &m,      VALUE,
        sizeof(int),                      &n,      VALUE,
        sizeof(int),                      &k,      VALUE,
        sizeof(PLASMA_Complex64_t),       alpha,           INPUT,
        sizeof(PLASMA_Complex64_t)*nb*nb, A,               NODEP,          /* input; see Alock */
        sizeof(int),                      &lda,    VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb, B,               NODEP,          /* input; see Block */
        sizeof(int),                      &ldb,    VALUE,
        sizeof(PLASMA_Complex64_t),       beta,            INPUT,
        sizeof(PLASMA_Complex64_t)*nb*nb, C,                       NODEP,  /* inout; see Clock */
        sizeof(int),                      &ldc,    VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb, Alock,           INPUT,
        sizeof(PLASMA_Complex64_t)*nb,    Block,           INPUT,
        sizeof(PLASMA_Complex64_t)*nb,    Clock,                   INOUT,
        0);
}
示例#6
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_cttmlq(Quark *quark, Quark_Task_Flags *task_flags,
                       int side, int trans,
                       int m1, int n1, int m2, int n2, int k, int ib, int nb,
                       PLASMA_Complex32_t *A1, int lda1,
                       PLASMA_Complex32_t *A2, int lda2,
                       PLASMA_Complex32_t *V, int ldv,
                       PLASMA_Complex32_t *T, int ldt)
{
    int ldwork = side == PlasmaLeft ? ib : nb;

    DAG_CORE_TTMLQ;
    QUARK_Insert_Task(quark, CORE_cttmlq_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A1,            INOUT,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    V,             INPUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork,    VALUE,
        0);
}
示例#7
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zbrdalg(Quark *quark, Quark_Task_Flags *task_flags,
                        int uplo,
                        int N, int NB,
                        PLASMA_desc *A,
                        PLASMA_Complex64_t *V,
                        PLASMA_Complex64_t *TAU,
                        int i, int j, int m, int grsiz, int BAND,
                        int *PCOL, int *ACOL, int *MCOL)
{
    QUARK_Insert_Task(quark, CORE_zbrdalg_quark,   task_flags,
        sizeof(int),               &uplo,               VALUE,
        sizeof(int),                  &N,               VALUE,
        sizeof(int),                 &NB,               VALUE,
        sizeof(PLASMA_desc),           A,               NODEP,
        sizeof(PLASMA_Complex64_t),    V,               NODEP,
        sizeof(PLASMA_Complex64_t),    TAU,               NODEP,
        sizeof(int),                  &i,               VALUE,
        sizeof(int),                  &j,               VALUE,
        sizeof(int),                  &m,               VALUE,
        sizeof(int),              &grsiz,               VALUE,
        sizeof(int),                PCOL,               INPUT,
        sizeof(int),                ACOL,               INPUT,
        sizeof(int),                MCOL,              OUTPUT | LOCALITY,
        0);

}
示例#8
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_dtstrf(Quark *quark, Quark_Task_Flags *task_flags,
                       int m, int n, int ib, int nb,
                       double *U, int ldu,
                       double *A, int lda,
                       double *L, int ldl,
                       int *IPIV,
                       PLASMA_sequence *sequence, PLASMA_request *request,
                       PLASMA_bool check_info, int iinfo)
{
    DAG_CORE_TSTRF;
    QUARK_Insert_Task(quark, CORE_dtstrf_quark, task_flags,
        sizeof(int),                        &m,             VALUE,
        sizeof(int),                        &n,             VALUE,
        sizeof(int),                        &ib,            VALUE,
        sizeof(int),                        &nb,            VALUE,
        sizeof(double)*nb*nb,    U,                     INOUT | QUARK_REGION_D | QUARK_REGION_U,
        sizeof(int),                        &ldu,           VALUE,
        sizeof(double)*nb*nb,    A,                     INOUT | LOCALITY,
        sizeof(int),                        &lda,           VALUE,
        sizeof(double)*ib*nb,    L,                     OUTPUT,
        sizeof(int),                        &ldl,           VALUE,
        sizeof(int)*nb,                      IPIV,                  OUTPUT,
        sizeof(double)*ib*nb,    NULL,                  SCRATCH,
        sizeof(int),                        &nb,            VALUE,
        sizeof(PLASMA_sequence*),           &sequence,      VALUE,
        sizeof(PLASMA_request*),            &request,       VALUE,
        sizeof(PLASMA_bool),                &check_info,    VALUE,
        sizeof(int),                        &iinfo,         VALUE,
        0);
}
/***************************************************************************//**
 *
 **/
void QUARK_CORE_ctsmqr_corner(Quark *quark, Quark_Task_Flags *task_flags,
                         int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
                         PLASMA_Complex32_t *A1, int lda1,
                         PLASMA_Complex32_t *A2, int lda2,
                         PLASMA_Complex32_t *A3, int lda3,
                         PLASMA_Complex32_t *V, int ldv,
                         PLASMA_Complex32_t *T, int ldt)
{
    int ldwork = nb;

    QUARK_Insert_Task(quark, CORE_ctsmqr_corner_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &m3,    VALUE,
        sizeof(int),                        &n3,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int),                        &nb,    VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A3,            INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                        &lda3,  VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(PLASMA_Complex32_t)*4*nb*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}
示例#10
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_dtsmlq_corner(Quark *quark, Quark_Task_Flags *task_flags,
                         int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
                         double *A1, int lda1,
                         double *A2, int lda2,
                         double *A3, int lda3,
                         double *V, int ldv,
                         double *T, int ldt)
{
    int ldwork = nb;

    QUARK_Insert_Task(quark, CORE_dtsmlq_corner_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &m3,    VALUE,
        sizeof(int),                        &n3,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int),                        &nb,    VALUE,
        sizeof(double)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(double)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(double)*nb*nb,    A3,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda3,  VALUE,
        sizeof(double)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(double)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(double)*4*nb*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}
示例#11
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_dtsmqr_sytra1(Quark *quark, Quark_Task_Flags *task_flags,
                        int side, int trans,
                        int m1, int n1, int m2, int n2, int k, int ib, int nb,
                        double *A1, int lda1,
                        double *A2, int lda2,
                        double *V, int ldv,
                        double *T, int ldt)
{
    int ldwork = side == PlasmaLeft ? ib : nb;

    QUARK_Insert_Task(quark, CORE_dtsmqr_sytra1_quark, task_flags,
        sizeof(PLASMA_enum),                &side,  VALUE,
        sizeof(PLASMA_enum),                &trans, VALUE,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(double)*nb*nb,    A1,            INOUT|QUARK_REGION_L|QUARK_REGION_D,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(double)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(double)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(double)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(double)*ib*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}
示例#12
0
/* Create a call that will insert a matmul task into the QUARK
 * runtime.  Later, when dependencies are statisfied, the runtime will
 * execute this task.  The arguments to matmul are specified and
 * passed to QUARK here. */
void matmul_quark_call( Quark *quark, double *A, double *B, double *C, int NB )
{
    QUARK_Insert_Task( quark, matmul_quark_task, NULL,
                       sizeof(double)*NB*NB, A, INPUT,
                       sizeof(double)*NB*NB, B, INPUT,
                       sizeof(double)*NB*NB, C, INOUT,
                       sizeof(int), &NB, VALUE,
                       0 );
}
示例#13
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_dgetrip(Quark *quark, Quark_Task_Flags *task_flags,
                        int m, int n, double *A, int szeA)
{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(quark, CORE_dgetrip_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(double)*szeA, A,        INOUT,
        sizeof(double)*szeA, NULL,     SCRATCH,
        0);
}
示例#14
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zgetrip(Quark *quark, Quark_Task_Flags *task_flags,
                        int m, int n, PLASMA_Complex64_t *A, int szeA)
{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(quark, CORE_zgetrip_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(PLASMA_Complex64_t)*szeA, A,        INOUT,
        sizeof(PLASMA_Complex64_t)*szeA, NULL,     SCRATCH,
        0);
}
示例#15
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zlaset2(Quark *quark, Quark_Task_Flags *task_flags,
                       PLASMA_enum uplo, int M, int N,
                       PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA)
{
    QUARK_Insert_Task(quark, CORE_zlaset2_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &M,     VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(PLASMA_Complex64_t),         &alpha, VALUE,
        sizeof(PLASMA_Complex64_t)*M*N,     A,      OUTPUT,
        sizeof(int),                        &LDA,   VALUE,
        0);
}
示例#16
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zswpab(Quark *quark, Quark_Task_Flags *task_flags,
                       int i, int n1, int n2,
                       PLASMA_Complex64_t *A, int szeA)
{
    DAG_CORE_SWPAB;
    QUARK_Insert_Task(
        quark, CORE_zswpab_quark, task_flags,
        sizeof(int),                           &i,   VALUE,
        sizeof(int),                           &n1,  VALUE,
        sizeof(int),                           &n2,  VALUE,
        sizeof(PLASMA_Complex64_t)*szeA,       A,            INOUT,
        sizeof(PLASMA_Complex64_t)*min(n1,n2), NULL,         SCRATCH,
        0);
}
示例#17
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_clag2z(Quark *quark, Quark_Task_Flags *task_flags,
                       int m, int n, int nb,
                       const PLASMA_Complex32_t *A, int lda,
                       PLASMA_Complex64_t *B, int ldb)
{
    QUARK_Insert_Task(quark, CORE_clag2z_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A,             INPUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    B,             INOUT,
        sizeof(int),                        &ldb,   VALUE,
        0);
}
示例#18
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_cgetrip_f1(Quark *quark, Quark_Task_Flags *task_flags,
                           int m, int n, 
                           PLASMA_Complex32_t *A,    int szeA,
                           PLASMA_Complex32_t *fake, int szeF, int paramF)
{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(
        quark, CORE_cgetrip_f1_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(PLASMA_Complex32_t)*szeA, A,        INOUT,
        sizeof(PLASMA_Complex32_t)*szeA, NULL,     SCRATCH,
        sizeof(PLASMA_Complex32_t)*szeF, fake,     paramF,
        0);
}
示例#19
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zplrnt( Quark *quark, Quark_Task_Flags *task_flags,
                        int m, int n, PLASMA_Complex64_t *A, int lda,
                        int bigM, int m0, int n0, unsigned long long int seed )
{
    DAG_CORE_PLRNT;
    QUARK_Insert_Task(quark, CORE_zplrnt_quark, task_flags,
        sizeof(int),                      &m,    VALUE,
        sizeof(int),                      &n,    VALUE,
        sizeof(PLASMA_Complex64_t)*lda*n, A,         OUTPUT,
        sizeof(int),                      &lda,  VALUE,
        sizeof(int),                      &bigM, VALUE,
        sizeof(int),                      &m0,   VALUE,
        sizeof(int),                      &n0,   VALUE,
        sizeof(unsigned long long int),   &seed, VALUE,
        0);
}
示例#20
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_clacpy(Quark *quark, Quark_Task_Flags *task_flags,
                       PLASMA_enum uplo, int m, int n, int nb,
                       PLASMA_Complex32_t *A, int lda,
                       PLASMA_Complex32_t *B, int ldb)
{
    DAG_CORE_LACPY;
    QUARK_Insert_Task(quark, CORE_clacpy_quark, task_flags,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A,             INPUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    B,             OUTPUT,
        sizeof(int),                        &ldb,   VALUE,
        0);
}
示例#21
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_caxpy(Quark *quark, Quark_Task_Flags *task_flags,
                      int m, int n, int nb, PLASMA_Complex32_t alpha,
                      PLASMA_Complex32_t *A, int lda,
                      PLASMA_Complex32_t *B, int ldb)
{
    DAG_CORE_AXPY;
    QUARK_Insert_Task(quark, CORE_caxpy_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(PLASMA_Complex32_t),         &alpha, VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A,             INPUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    B,             INOUT,
        sizeof(int),                        &ldb,   VALUE,
        0);
}
示例#22
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zlanhe(Quark *quark, Quark_Task_Flags *task_flags,
                       int norm, int uplo, int N,
                       PLASMA_Complex64_t *A, int LDA, int szeA,
                       int szeW, double *result)
{
    szeW = max(1, szeW);
    DAG_CORE_LANHE;
    QUARK_Insert_Task(quark, CORE_zlanhe_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(PLASMA_Complex64_t)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(double)*szeW,                 NULL,          SCRATCH,
        sizeof(double),                     result,         OUTPUT,
        0);
}
示例#23
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_slansy(Quark *quark, Quark_Task_Flags *task_flags,
                       int norm, int uplo, int N,
                       float *A, int LDA, int szeA,
                       int szeW, float *result)
{
    szeW = max(1, szeW);
    DAG_CORE_LANSY;
    QUARK_Insert_Task(quark, CORE_slansy_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(PLASMA_enum),                &uplo,  VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(float)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(float)*szeW,                 NULL,          SCRATCH,
        sizeof(float),                      result,        OUTPUT,
        0);
}
示例#24
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zgetrip_f2(Quark *quark, Quark_Task_Flags *task_flags,
                           int m, int n, 
                           PLASMA_Complex64_t *A,    int szeA,
                           PLASMA_Complex64_t *fake1, int szeF1, int paramF1,
                           PLASMA_Complex64_t *fake2, int szeF2, int paramF2)
{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(
        quark, CORE_zgetrip_f2_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(PLASMA_Complex64_t)*szeA, A,        INOUT,
        sizeof(PLASMA_Complex64_t)*szeA, NULL,     SCRATCH,
        sizeof(PLASMA_Complex64_t)*szeF1, fake1,     paramF1,
        sizeof(PLASMA_Complex64_t)*szeF2, fake2,     paramF2,
        0);
}
示例#25
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_dgetrip_f2(Quark *quark, Quark_Task_Flags *task_flags,
                           int m, int n, 
                           double *A,    int szeA,
                           double *fake1, int szeF1, int paramF1,
                           double *fake2, int szeF2, int paramF2)
{
    DAG_CORE_GETRIP;
    QUARK_Insert_Task(
        quark, CORE_dgetrip_f2_quark, task_flags,
        sizeof(int),                     &m,   VALUE,
        sizeof(int),                     &n,   VALUE,
        sizeof(double)*szeA, A,        INOUT,
        sizeof(double)*szeA, NULL,     SCRATCH,
        sizeof(double)*szeF1, fake1,     paramF1,
        sizeof(double)*szeF2, fake2,     paramF2,
        0);
}
示例#26
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_clange(Quark *quark, Quark_Task_Flags *task_flags,
                       int norm, int M, int N,
                       PLASMA_Complex32_t *A, int LDA, int szeA,
                       int szeW, float *result)
{
    szeW = max(1, szeW);
    DAG_CORE_LANGE;
    QUARK_Insert_Task(quark, CORE_clange_quark, task_flags,
        sizeof(PLASMA_enum),                &norm,  VALUE,
        sizeof(int),                        &M,     VALUE,
        sizeof(int),                        &N,     VALUE,
        sizeof(PLASMA_Complex32_t)*szeA,     A,             INPUT,
        sizeof(int),                        &LDA,   VALUE,
        sizeof(float)*szeW,                 NULL,          SCRATCH,
        sizeof(float),                      result,        OUTPUT,
        0);
}
示例#27
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_cgelqt(Quark *quark, Quark_Task_Flags *task_flags,
                       int m, int n, int ib, int nb,
                       PLASMA_Complex32_t *A, int lda,
                       PLASMA_Complex32_t *T, int ldt)
{
    DAG_CORE_GELQT;
    QUARK_Insert_Task(quark, CORE_cgelqt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(PLASMA_Complex32_t)*nb,       NULL,          SCRATCH,
        sizeof(PLASMA_Complex32_t)*ib*nb,    NULL,          SCRATCH,
        0);
}
示例#28
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_dgeqrt(Quark *quark, Quark_Task_Flags *task_flags,
                       int m, int n, int ib, int nb,
                       double *A, int lda,
                       double *T, int ldt)
{
    DAG_CORE_GEQRT;
    QUARK_Insert_Task(quark, CORE_dgeqrt_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(double)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        sizeof(double)*ib*nb,    T,             OUTPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(double)*nb,       NULL,          SCRATCH,
        sizeof(double)*ib*nb,    NULL,          SCRATCH,
        0);
}
示例#29
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_zlag2c(Quark *quark, Quark_Task_Flags *task_flags,
                       int m, int n, int nb,
                       const PLASMA_Complex64_t *A, int lda,
                       PLASMA_Complex32_t *B, int ldb,
                       PLASMA_sequence *sequence, PLASMA_request *request)
{
    DAG_CORE_LAG2C;
    QUARK_Insert_Task(quark, CORE_zlag2c_quark, task_flags,
        sizeof(int),                        &m,         VALUE,
        sizeof(int),                        &n,         VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    A,                 INPUT,
        sizeof(int),                        &lda,       VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,    B,                 OUTPUT,
        sizeof(int),                        &ldb,       VALUE,
        sizeof(PLASMA_sequence*),           &sequence,  VALUE,
        sizeof(PLASMA_request*),            &request,   VALUE,
        0);
}
示例#30
0
/***************************************************************************//**
 *
 **/
void QUARK_CORE_sgessm(Quark *quark, Quark_Task_Flags *task_flags,
                       int m, int n, int k, int ib, int nb,
                       int *IPIV,
                       float *L, int ldl,
                       float *A, int lda)
{
    DAG_CORE_GESSM;
    QUARK_Insert_Task(quark, CORE_sgessm_quark, task_flags,
        sizeof(int),                        &m,     VALUE,
        sizeof(int),                        &n,     VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int)*nb,                      IPIV,          INPUT,
        sizeof(float)*nb*nb,    L,             INPUT | QUARK_REGION_L,
        sizeof(int),                        &ldl,   VALUE,
        sizeof(float)*nb*nb,    A,             INOUT,
        sizeof(int),                        &lda,   VALUE,
        0);
}