static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *AT; real_Double_t t; PLASMA_desc *descA; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; PLASMA_enum uplo = PlasmaLower; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* * Initialize Data * It's done in static to avoid having the same sequence than one * the function we want to trace */ PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_dplgsy_Tile( (double)n, descA, 51 ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /* Save AT in lapack layout for check */ if ( check ) { } /* PLASMA DPOTRF / DTRTRI / DLAUUM */ /* * Example of the different way to combine several asynchonous calls */ { #if defined(TRACE_BY_SEQUENCE) PLASMA_sequence *sequence[3]; PLASMA_request request[3] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; PLASMA_Sequence_Create(&sequence[0]); PLASMA_Sequence_Create(&sequence[1]); PLASMA_Sequence_Create(&sequence[2]); t = -cWtime(); #if defined(POTRI_SYNC) PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[2]); #else PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_Sequence_Wait(sequence[2]); #endif t += cWtime(); PLASMA_Sequence_Destroy(sequence[0]); PLASMA_Sequence_Destroy(sequence[1]); PLASMA_Sequence_Destroy(sequence[2]); #else #if defined(POTRI_SYNC) t = -cWtime(); PLASMA_dpotrf_Tile(uplo, descA); PLASMA_dtrtri_Tile(uplo, PlasmaNonUnit, descA); PLASMA_dlauum_Tile(uplo, descA); t += cWtime(); #else /* Default: we use Asynchonous call with only one sequence */ PLASMA_sequence *sequence; PLASMA_request request[2] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; t = -cWtime(); PLASMA_Sequence_Create(&sequence); PLASMA_dpotrf_Tile_Async(uplo, descA, sequence, &request[0]); PLASMA_dpotri_Tile_Async(uplo, descA, sequence, &request[1]); PLASMA_Sequence_Wait(sequence); t += cWtime(); PLASMA_Sequence_Destroy(sequence); #endif #endif *t_ = t; } /* Check the solution */ if ( check ) { dparam[TIMING_ANORM] = 0.0; dparam[TIMING_XNORM] = 0.0; dparam[TIMING_BNORM] = 0.0; dparam[TIMING_RES] = 0.0; } PLASMA_Desc_Destroy(&descA); PLASMA_Finalize(); free(AT); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex64_t *A, *A2 = NULL; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_zlarnv_work(1, ISEED, lda*n, A); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save A in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_zgetrf_reclap_init(); t = -cWtime(); QUARK_CORE_zgetrf_reclap(plasma->quark, &task_flags, m, n, n, A, lda, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { double *work = (double *)malloc(max(m,n)*sizeof(double)); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, creal(A[ i * lda + i ]), i, ipiv2[i], i, creal(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_zaxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } free( A ); free( ipiv ); PLASMA_Finalize(); return 0; }