static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { fprintf(stderr, "Check cannot be perfomed with M != N\n"); check = 0; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PLASMA_zplrnt_Tile(descA, 3456); { PLASMA_Complex64_t *Amat; int m, i, ldam; for(m=0; m<MT; m++) { ldam = BLKLDD( *descA, m ); Amat = (PLASMA_Complex64_t*)plasma_getaddr(*descA, m, m); for(i=0; i<ldam; i++) { Amat[i*ldam+i] += max(M,N); } } } /* Save AT in lapack layout for check */ PASTE_TILE_TO_LAPACK( descA, A, check, PLASMA_Complex64_t, LDA, N ); START_TIMING(); PLASMA_zgetrf_nopiv_Tile( descA ); STOP_TIMING(); /* Check the solution */ if ( check ) { PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDB, N, NRHS ); PLASMA_zplrnt_Tile( descB, 7732 ); PASTE_TILE_TO_LAPACK( descB, b, check, PLASMA_Complex64_t, LDB, NRHS ); PLASMA_ztrsm_Tile( PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaUnit, 1.0, descA, descB ); PLASMA_ztrsm_Tile( PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, 1.0, descA, descB ); PASTE_TILE_TO_LAPACK( descB, x, check, PLASMA_Complex64_t, LDB, NRHS ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, A, LDA, b, x, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(A); free(b); free(x); } PASTE_CODE_FREE_MATRIX( descA ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_desc *descT; PASTE_CODE_IPARAM_LOCALS( iparam ); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PLASMA_zplrnt_Tile( descA, 5373 ); /* Save A for check */ PASTE_TILE_TO_LAPACK( descA, A, ( check && M == N ), PLASMA_Complex64_t, LDA, N ); /* Allocate B for check */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, (check && M == N), PLASMA_Complex64_t, PlasmaComplexDouble, LDB, M, NRHS ); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT); /* Do the computations */ START_TIMING(); PLASMA_zgeqrf_Tile( descA, descT ); STOP_TIMING(); /* Check the solution */ if ( check && M == N ) { /* Initialize and save B */ PLASMA_zplrnt_Tile( descB, 2264 ); PASTE_TILE_TO_LAPACK( descB, B, 1, PLASMA_Complex64_t, LDB, NRHS ); /* Compute the solution */ PLASMA_zgeqrs_Tile( descA, descT, descB ); /* Copy solution to X */ PASTE_TILE_TO_LAPACK( descB, X, 1, PLASMA_Complex64_t, LDB, NRHS ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, A, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); /* Free checking structures */ PASTE_CODE_FREE_MATRIX( descB ); free( A ); free( B ); free( X ); } /* Free data */ PLASMA_Dealloc_Handle_Tile(&descT); PASTE_CODE_FREE_MATRIX( descA ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_desc *T; PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { fprintf(stderr, "Check cannot be perfomed with M != N\n"); check = 0; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, PLASMA_Complex64_t, LDA, N ); /* Initialize Data */ PLASMA_zplrnt(M, N, A, LDA, 3456); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels(M, N, &T); /* Save AT in lapack layout for check */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, PLASMA_Complex64_t, A, LDA, N ); START_TIMING(); PLASMA_zgeqrf( M, N, A, LDA, T ); STOP_TIMING(); /* Check the solution */ if ( check ) { PASTE_CODE_ALLOCATE_MATRIX( X, 1, PLASMA_Complex64_t, LDB, NRHS ); PLASMA_zplrnt( N, NRHS, X, LDB, 5673 ); PASTE_CODE_ALLOCATE_COPY( B, 1, PLASMA_Complex64_t, X, LDB, NRHS ); PLASMA_zgeqrs(M, N, NRHS, A, LDA, T, X, LDB); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free( Acpy ); free( B ); free( X ); } /* Free Workspace */ PLASMA_Dealloc_Handle_Tile( &T ); free( A ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_desc *L; int *piv; PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N ) { fprintf(stderr, "This timing works only with M == N\n"); return -1; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, PLASMA_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( X, 1, PLASMA_Complex64_t, LDB, NRHS ); /* Initialiaze Data */ PLASMA_zplrnt( N, N, A, LDA, 51 ); PLASMA_zplrnt( N, NRHS, X, LDB, 5673 ); PLASMA_Alloc_Workspace_zgesv_incpiv(N, &L, &piv); /* Save A and b */ PASTE_CODE_ALLOCATE_COPY( Acpy, check, PLASMA_Complex64_t, A, LDA, N ); PASTE_CODE_ALLOCATE_COPY( B, check, PLASMA_Complex64_t, X, LDB, NRHS ); START_TIMING(); PLASMA_zgesv_incpiv( N, NRHS, A, LDA, L, piv, X, LDB ); STOP_TIMING(); /* Check the solution */ if (check) { dparam[IPARAM_RES] = z_check_solution(N, N, NRHS, Acpy, LDA, B, X, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(Acpy); free(B); } PLASMA_Dealloc_Handle_Tile( &L ); free( piv ); free( X ); free( A ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); if ( M != N && check ) { fprintf(stderr, "Check cannot be perfomed with M != N\n"); check = 0; } /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, N ); PASTE_CODE_ALLOCATE_MATRIX( piv, 1, int, min(M, N), 1 ); PLASMA_zplrnt_Tile(descA, 3456); /* Save AT in lapack layout for check */ PASTE_TILE_TO_LAPACK( descA, A, check, PLASMA_Complex64_t, LDA, N ); START_TIMING(); PLASMA_zgetrf_tntpiv_Tile( descA, piv ); STOP_TIMING(); /* Check the solution */ if ( check ) { PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDB, N, NRHS ); PLASMA_zplrnt_Tile( descB, 7732 ); PASTE_TILE_TO_LAPACK( descB, b, check, PLASMA_Complex64_t, LDB, NRHS ); PLASMA_zgetrs_Tile( PlasmaNoTrans, descA, piv, descB ); PASTE_TILE_TO_LAPACK( descB, x, check, PLASMA_Complex64_t, LDB, NRHS ); dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, A, LDA, b, x, LDB, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); PASTE_CODE_FREE_MATRIX( descB ); free(A); free(b); free(x); } PASTE_CODE_FREE_MATRIX( descA ); free( piv ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A = NULL, *AT, *b, *bT, *x; PLASMA_desc *descA, *descB, *descL; real_Double_t t; int *piv; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_zlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgesv_incpiv_Tile(n, &descL, &piv); /* Save AT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_zgetrf_incpiv_Tile( descA, descL, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); bT = (PLASMA_Complex64_t *)malloc(nt*nb2 *sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, b); PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Lapack_to_Tile((void*)b, n, descB); PLASMA_zgetrs_incpiv_Tile( descA, descL, piv, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Deallocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descL); PLASMA_Desc_Destroy(&descA); free( AT ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A, *Acpy = NULL, *b = NULL, *x; real_Double_t t; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n* sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (!A) || (!x) ) { printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* Initialiaze Data */ PLASMA_zplghe((double)n, n, A, lda, 51 ); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, x); /* Save A and b */ if (check) { Acpy = (PLASMA_Complex64_t *)malloc(lda*n* sizeof(PLASMA_Complex64_t)); b = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, x, ldb, b, ldb); } /* PLASMA ZPOSV */ t = -cWtime(); PLASMA_zposv(PlasmaUpper, n, nrhs, A, lda, x, ldb); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = z_check_solution(n, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(Acpy); free(b); } free(A); free(x); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *AT, *bT, *x; PLASMA_Complex64_t *A = NULL; PLASMA_Complex64_t *b = NULL; PLASMA_desc *descA, *descB; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); bT = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs); LAPACKE_zlarnv_work(1, ISEED, lda*n, AT); LAPACKE_zlarnv_work(1, ISEED, ldb*nrhs, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); b = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); PLASMA_zTile_to_Lapack(descA, (void*)A, lda); PLASMA_zTile_to_Lapack(descB, (void*)b, ldb); } t = -cWtime(); PLASMA_zgesv_Tile( descA, piv, descB ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { x = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); PLASMA_zTile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); free( AT ); free( bT ); free( piv ); PLASMA_Finalize(); return 0; }