static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex32_t *A, *AT, *A2 = NULL; PLASMA_desc *descA; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int nb = iparam[TIMING_NB]; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); AT = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( ( !AT ) || (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, lda*n, A); /* for(i=0; i<n; i++) { */ /* A[i*lda+i] += (float)m; */ /* } */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, m, n); PLASMA_cLapack_to_Tile((void*)A, lda, descA); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save AT in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_cgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_cgetrf_rectil_init(); t = -cWtime(); QUARK_CORE_cgetrf_rectil(plasma->quark, &task_flags, *descA, AT, descA->mb*descA->nb, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { float *work = (float *)malloc(max(m,n)*sizeof(float)); PLASMA_cTile_to_Lapack(descA, (void*)A, lda); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, crealf(A[ i * lda + i ]), i, ipiv2[i], i, crealf(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_caxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } /* Deallocate Workspace */ PLASMA_Desc_Destroy(&descA); free( A ); free( AT ); free( ipiv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { PLASMA_Complex32_t *AT, *bT, *x; PLASMA_Complex32_t *A = NULL; PLASMA_Complex32_t *b = NULL; PLASMA_desc *descA, *descB; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Allocate Data */ AT = (PLASMA_Complex32_t *)malloc(lda*n *sizeof(PLASMA_Complex32_t)); bT = (PLASMA_Complex32_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex32_t)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaComplexFloat, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs); LAPACKE_clarnv_work(1, ISEED, lda*n, AT); LAPACKE_clarnv_work(1, ISEED, ldb*nrhs, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex32_t *)malloc(lda*n *sizeof(PLASMA_Complex32_t)); b = (PLASMA_Complex32_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex32_t)); PLASMA_cTile_to_Lapack(descA, (void*)A, lda); PLASMA_cTile_to_Lapack(descB, (void*)b, ldb); } t = -cWtime(); PLASMA_cgesv_Tile( descA, piv, descB ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { x = (PLASMA_Complex32_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex32_t)); PLASMA_cTile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = c_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); free( AT ); free( bT ); free( piv ); PLASMA_Finalize(); return 0; }