int main () { int cores = 2; int N = 10 ; int LDA = 10 ; int info; int info_factorization; int i,j; int NminusOne = N-1; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDA*sizeof(PLASMA_Complex32_t)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 for Symmetric Positive Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); claghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i]+ (PLASMA_Complex32_t)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Plasma routines */ PLASMA_cpotrf(PlasmaUpper, N, A2, LDA); /* Check the factorization */ info_factorization = check_factorization( N, A1, A2, LDA, PlasmaUpper); if ((info_factorization != 0)|(info != 0)) printf("-- Error in CPOTRF example ! \n"); else printf("-- Run of CPOTRF example successful ! \n"); free(A1); free(A2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
int plasma_init() { #ifdef HIPLAR_WITH_PLASMA int info; R_PLASMA_NUM_THREADS = 1; if (getenv("R_PLASMA_NUM_THREADS") != NULL) { R_PLASMA_NUM_THREADS = atoi(getenv("R_PLASMA_NUM_THREADS")); } else { Rprintf("The envirnment variable R_PLASMA_NUM_THREADS is not set.\n"); Rprintf("Using one thread for PLASMA.\nPlease set R_PLASMA_NUM_THREADS to the number of actual cores.\n\n"); //printf("\nThe envirnment variable R_PLASMA_NUM_THREADS is not set.\n"); //printf("Using one thread for PLASMA.\nPlease set R_PLASMA_NUM_THREADS to the number of actual cores.\n\n");v } /* Init PLASMA */ info = PLASMA_Init(R_PLASMA_NUM_THREADS); if ((getenv("R_PLASMA_SCHED") != NULL) && (strcmp(getenv("R_PLASMA_SCHED"), "STATIC") == 0)) { PLASMA_Set( PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); R_PLASMA_SCHED = 0; } else { PLASMA_Set( PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); R_PLASMA_SCHED = 1; } PLASMA_Version(&R_PLASMA_MAJOR, &R_PLASMA_MINOR, &R_PLASMA_MICRO); if ((PLASMA_VERSION_MAJOR != R_PLASMA_MAJOR) || (PLASMA_VERSION_MINOR != R_PLASMA_MINOR) || (PLASMA_VERSION_MICRO != R_PLASMA_MICRO)) { Rprintf("ERROR: PLASMA version mismatch\n"); Rprintf("ERROR: PLASMA version mismatch %d.%d.%d %d.%d.%d\n", PLASMA_VERSION_MAJOR, PLASMA_VERSION_MINOR, PLASMA_VERSION_MICRO, R_PLASMA_MAJOR, R_PLASMA_MINOR, R_PLASMA_MICRO); } return(info); #endif return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex64_t *A, *A2 = NULL; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_zlarnv_work(1, ISEED, lda*n, A); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save A in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_zgetrf_reclap_init(); t = -cWtime(); QUARK_CORE_zgetrf_reclap(plasma->quark, &task_flags, m, n, n, A, lda, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { double *work = (double *)malloc(max(m,n)*sizeof(double)); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, creal(A[ i * lda + i ]), i, ipiv2[i], i, creal(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_zaxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } free( A ); free( ipiv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A, *Acpy = NULL, *b = NULL, *x; real_Double_t t; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n* sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (!A) || (!x) ) { printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* Initialiaze Data */ PLASMA_zplghe((double)n, n, A, lda, 51 ); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, x); /* Save A and b */ if (check) { Acpy = (PLASMA_Complex64_t *)malloc(lda*n* sizeof(PLASMA_Complex64_t)); b = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, x, ldb, b, ldb); } /* PLASMA ZPOSV */ t = -cWtime(); PLASMA_zposv(PlasmaUpper, n, nrhs, A, lda, x, ldb); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = z_check_solution(n, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(Acpy); free(b); } free(A); free(x); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int N = 10 ; int LDA = 10 ; int NRHS = 5 ; int LDB = 10 ; int info; int info_solution; int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDA*sizeof(PLASMA_Complex32_t)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)) { printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 for Symmetric Positive Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); claghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++) { A1[LDA*i+i] = A1[LDA*i+i]+ (PLASMA_Complex32_t)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA routines */ info = PLASMA_cpotrf(PlasmaLower, N, A2, LDA); info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB); info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaConjTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in CTRSM example ! \n"); else printf("-- Run of CTRSM example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
int main( int argc, char *argv[] ) { int i, j, size, n_threads, n_repeats, n_trials, nb_alg, increment, begin; FLA_Datatype datatype = FLA_DOUBLE; FLA_Obj A; double b_norm_value = 0.0, dtime, *dtimes, *flops, *T; char output_file_m[100]; FILE *fpp; fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d\n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n\n", '%', n_threads ); sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%\n" ); fprintf( fpp, "%% | Matrix Size | PLASMA |\n" ); fprintf( fpp, "%% | n x n | GFlops |\n" ); fprintf( fpp, "%% -----------------------------\n" ); FLA_Init(); PLASMA_Init( n_threads ); PLASMA_Disable( PLASMA_AUTOTUNING ); PLASMA_Set( PLASMA_TILE_SIZE, nb_alg ); PLASMA_Set( PLASMA_INNER_BLOCK_SIZE, nb_alg / 4 ); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); fprintf( fpp, "%s = [\n", OUTPUT_FILE ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); PLASMA_Alloc_Workspace_dgeqrf( size, size, &T ); dtime = FLA_Clock(); PLASMA_dgeqrf( size, size, FLA_Obj_buffer_at_view( A ), size, T ); dtime = FLA_Clock() - dtime; dtimes[j] = dtime; free( T ); } dtime = dtimes[0]; for ( j = 1; j < n_repeats; j++ ) dtime = min( dtime, dtimes[j] ); flops[i] = 4.0 / 3.0 * size * size * size / dtime / 1e9; fprintf( fpp, " %d %6.3f\n", size, flops[i] ); printf( "Time: %e | GFlops: %6.3f\n", dtime, flops[i] ); printf( "Matrix size: %d x %d | nb_alg: %d\n", size, size, nb_alg ); printf( "Norm of difference: %le\n\n", b_norm_value ); FLA_Obj_free( &A ); } fprintf( fpp, "];\n" ); fflush( fpp ); fclose( fpp ); FLA_free( dtimes ); FLA_free( flops ); PLASMA_Finalize(); FLA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A = NULL, *AT, *b = NULL, *bT, *x; PLASMA_desc *descA, *descB, *descT; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); #if defined(PLASMA_CUDA) core_cublas_init(); #endif /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } #if defined(PLASMA_CUDA) cudaHostRegister((void*)AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable); #endif /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_dlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_dgels_Tile(n, n, &descT); #if defined(PLASMA_CUDA) cudaHostRegister((void*)descT->mat, descT->lm*descT->ln*sizeof(double), cudaHostRegisterPortable); #endif /* Save AT in lapack layout for check */ if ( check ) { A = (double *)malloc(lda*n *sizeof(double)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_dgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (double *)malloc(ldb*nrhs *sizeof(double)); bT = (double *)malloc(nt*nb2 *sizeof(double)); x = (double *)malloc(ldb*nrhs *sizeof(double)); LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_dgeqrs_Tile( descA, descT, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); #if defined(PLASMA_CUDA) #endif return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *A = NULL, *AT, *b = NULL, *bT, *x; real_Double_t t; PLASMA_desc *descA, *descB; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (float *)malloc(nt*nt*nb2*sizeof(float)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_splgsy_Tile((float)n, descA, 51 ); /* Save AT in lapack layout for check */ if ( check ) { A = (float *)malloc(lda*n *sizeof(float)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } /* PLASMA SPOSV */ t = -cWtime(); PLASMA_spotrf_Tile(PlasmaUpper, descA); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (float *)malloc(ldb*nrhs *sizeof(float)); bT = (float *)malloc(nt*nb2 *sizeof(float)); x = (float *)malloc(ldb*nrhs *sizeof(float)); LAPACKE_slarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_spotrs_Tile( PlasmaUpper, descA, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } PLASMA_Desc_Destroy(&descA); free(AT); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int M = 15; int N = 10; int LDA = 15; int NRHS = 5; int LDB = 15; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; double *A1 = (double *)malloc(LDA*N*sizeof(double)); double *A2 = (double *)malloc(LDA*N*sizeof(double)); double *B1 = (double *)malloc(LDB*NRHS*sizeof(double)); double *B2 = (double *)malloc(LDB*NRHS*sizeof(double)); double *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_dgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Initialize B1 and B2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1); for (i = 0; i < M; i++) for (j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_dgeqrf(M, N, A2, LDA, T); /* Solve the problem */ info = PLASMA_dgeqrs(M, N, NRHS, A2, LDA, T, B2, LDB); /* Check the solution */ info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in DGEQRS example ! \n"); else printf("-- Run of DGEQRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(T); PLASMA_Finalize(); exit(0); }
/* * This function returns the solution of Ax=b * * The function assumes that A is symmetric & positive definite and employs the * Cholesky decomposition implemented by PLASMA for homogeneous multicore processors. * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_PLASMA_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a; int a_sz, tot_sz; int info, nrhs=1; if(A==NULL){ #ifdef LINSOLVERS_RETAIN_MEMORY if(buf) free(buf); buf=NULL; buf_sz=0; #endif /* LINSOLVERS_RETAIN_MEMORY */ PLASMA_Finalize(); PLASMA_ncores=-PLASMA_ncores; return 1; } /* calculate required memory size */ a_sz=m*m; tot_sz=a_sz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; /* store A into a and B into x; A is assumed to be symmetric, * hence no transposition is needed */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* initialize PLASMA */ if(PLASMA_ncores<0){ PLASMA_ncores=-PLASMA_ncores; PLASMA_Init(PLASMA_ncores); fprintf(stderr, RCAT("\n", AX_EQ_B_PLASMA_CHOL) "(): PLASMA is running on %d cores.\n\n", PLASMA_ncores); } /* Solve the linear system */ info=PLASMA_POSV(PlasmaLower, m, 1, a, m, x, m); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", PLASMA_POSV) " in ", AX_EQ_B_PLASMA_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\n" "the factorization could not be completed for ", PLASMA_POSV) " in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; }
static int RunTest(int *iparam, _PREC *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A = NULL, *AT; PLASMA_desc *descA; real_Double_t t; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int check = iparam[TIMING_CHECK]; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ n = ((n % nb) == 0) ? (n / nb) * nb : ((n / nb) + 1) * nb ; dparam[TIMING_ANORM] = (_PREC)n; /* Allocate Data */ AT = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( (!AT) ) { printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_clarnv_work(1, ISEED, n*n, AT); /* Save A and b */ if (check) { A = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), n, n, AT, n, A, n); } t = -cWtime(); PLASMA_Lapack_to_Tile( (void *)A, n, descA); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = (_PREC)c_check_conversion(n, n, n, 1, nb, nb, A, AT, map_CM, map_CCRB); free(A); } PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A, *b, *x; double *Acpy = NULL; double *bcpy = NULL; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; int iter = 0; /* Allocate Data */ A = (double *)malloc(lda*n* sizeof(double)); b = (double *)malloc(ldb*nrhs*sizeof(double)); x = (double *)malloc(ldb*nrhs*sizeof(double)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!A) || (!b) || (!x) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } */ /* Initialiaze Data */ LAPACKE_dlarnv_work(1, ISEED, lda*n, A); LAPACKE_dlarnv_work(1, ISEED, ldb*nrhs, b); /* Save A and b */ if (check) { Acpy = (double *)malloc(lda*n* sizeof(double)); bcpy = (double *)malloc(ldb*nrhs*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, b, ldb, bcpy, ldb); } t = -cWtime(); PLASMA_dsgesv( n, nrhs, A, lda, piv, b, ldb, x, ldb, &iter ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = d_check_solution(n, n, nrhs, Acpy, lda, bcpy, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(Acpy); free(bcpy); } free( piv ); free( x ); free( b ); free( A ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *AT; PLASMA_desc *descA, *descT; real_Double_t t; int nb; int M = iparam[TIMING_N]; int N = iparam[TIMING_M]; //int N = M/nrhs; //RUN WITH NRHS = 10 or 20 (ALSO USED IN TIMING.C) int lda = M; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; /* Householder mode */ //PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_FLAT_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_TREE_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_SIZE, 4); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(lda*N*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, M, N, 0, 0, M, N); LAPACKE_zlarnv_work(1, ISEED, lda*N, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT); t = -cWtime(); PLASMA_zgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_desc *L; int *IPIV; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); return EXIT_SUCCESS; } /*Plasma Initialize*/ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 Matrix */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* Allocate L and IPIV */ info = PLASMA_Alloc_Workspace_zgetrf_incpiv(N, N, &L, &IPIV); /* LU factorization of the matrix A */ info = PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); /* Solve the problem */ info = PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB); info = PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex64_t)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in ZGETRS example ! \n"); else printf("-- Run of ZGETRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); PLASMA_Finalize(); return EXIT_SUCCESS; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A, *Acpy = NULL, *L, *b, *x; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( !A ){ printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, n*lda, A); /* Allocate Workspace */ PLASMA_Alloc_Workspace_cgesv_incpiv(n, &L, &piv); /* Save AT in lapack layout for check */ if ( check ) { Acpy = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); } t = -cWtime(); PLASMA_cgetrf_incpiv( n, n, A, lda, L, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (PLASMA_Complex32_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex32_t)); x = (PLASMA_Complex32_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex32_t)); LAPACKE_clarnv_work(1, ISEED, ldb*nrhs, x); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, x, ldb, b, ldb); PLASMA_cgetrs_incpiv( PlasmaNoTrans, n, nrhs, A, lda, L, piv, x, ldb ); dparam[TIMING_RES] = c_check_solution(n, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free( Acpy ); free( b ); free( x ); } free( A ); free( L ); free( piv ); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int M = 10; int N = 15; int LDA = 10; int K = min(M, N); int info; int info_ortho, info_factorization; int i,j; int LDAxN = LDA*N; double *A1 = (double *)malloc(LDA*N*sizeof(double)); double *A2 = (double *)malloc(LDA*N*sizeof(double)); double *Q = (double *)malloc(LDA*N*sizeof(double)); double *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!Q)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_dgelqf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_dgelqf(M, N, A2, LDA, T); /* Building the economy-size Q */ memset((void*)Q, 0, LDA*N*sizeof(double)); for (i = 0; i < K; i++) Q[LDA*i+i] = 1.0; PLASMA_dorglq(M, N, K, A2, LDA, T, Q, LDA); /* Check the orthogonality, factorization and the solution */ info_ortho = check_orthogonality(M, N, LDA, Q); info_factorization = check_factorization(M, N, A1, A2, LDA, Q); if ((info_ortho != 0)|(info_factorization != 0)|(info != 0)) printf("-- Error in DGELQF example ! \n"); else printf("-- Run of DGELQF example successful ! \n"); free(A1); free(A2); free(Q); free(T); PLASMA_Finalize(); exit(0); }
int main (int argc, char **argv) { int ncores, sched; int info; char func[32]; /* Check for number of arguments*/ if ( argc < 4) { printf(" Proper Usage is : ./stesting ncores sched FUNC ...\n" " - ncores : number of cores \n" " - sched : 0 for static, 1 for dynamic\n" " - FUNC : name of function to test\n"); exit(1); } sscanf( argv[1], "%d", &ncores ); sscanf( argv[2], "%d", &sched ); sscanf( argv[3], "%s", func ); PLASMA_Init(ncores); if ( sched == 0 ) PLASMA_Set( PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); else PLASMA_Set( PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); argc -= 4; argv += 4; info = 0; /* * Norms */ if ( strcmp(func, "LANGE") == 0 ) { info = testing_slange( argc, argv ); /* * Blas Level 3 */ } else if ( strcmp(func, "GEMM") == 0 ) { info = testing_sgemm( argc, argv ); #ifdef COMPLEX } else if ( strcmp(func, "HEMM") == 0 ) { info = testing_ssymm( argc, argv ); } else if ( strcmp(func, "HERK") == 0 ) { info = testing_ssyrk( argc, argv ); } else if ( strcmp(func, "HER2K") == 0 ) { info = testing_ssyr2k( argc, argv ); #endif } else if ( strcmp(func, "SYMM") == 0 ) { info = testing_ssymm( argc, argv ); } else if ( strcmp(func, "SYRK") == 0 ) { info = testing_ssyrk( argc, argv ); } else if ( strcmp(func, "SYR2K") == 0 ) { info = testing_ssyr2k( argc, argv ); } else if ( strcmp(func, "TRMM") == 0 ) { info = testing_strmm( argc, argv ); } else if ( strcmp(func, "TRSM") == 0 ) { info = testing_strsm( argc, argv ); /* * Linear system */ } else if ( strcmp(func, "POSV") == 0 ) { info = testing_sposv( argc, argv ); } else if ( strcmp(func, "GELS") == 0 ) { info = testing_sgels( argc, argv ); } else if ( strcmp(func, "GESV") == 0 ) { info = testing_sgesv( argc, argv ); /* * Eigenvalue Problems */ } else if ( strcmp(func, "HEEV") == 0 ) { info = testing_ssyev( argc, argv ); } else if ( strcmp(func, "HEGV") == 0 ) { info = testing_ssygv( argc, argv ); } else if ( strcmp(func, "HEGST") == 0 ) { info = testing_ssygst( argc, argv ); /* * Singular Value Decomposition */ } else if ( strcmp(func, "GESVD") == 0 ) { info = testing_sgesvd( argc, argv ); #ifdef DOUBLE /* * Mixed precision */ } else if ( strcmp(func, "SPOSV") == 0 ) { info = testing_dsposv( argc, argv ); } else if ( strcmp(func, "SGESV") == 0 ) { info = testing_dsgesv( argc, argv ); } else if ( strcmp(func, "SUNGESV") == 0 ) { info = testing_dsungesv( argc, argv ); #endif /* Layout Transformation */ } else if ( strcmp(func, "GECFI") == 0 ) { info = testing_sgecfi( argc, argv ); } else if ( strcmp(func, "GETMI") == 0 ) { info = testing_sgetmi( argc, argv ); } else { fprintf(stderr, "Function unknown\n"); } if ( info == -1 ) { printf( "TESTING %s FAILED : incorrect number of arguments\n", func); } else if ( info == -2 ) { printf( "TESTING %s FAILED : not enough memory\n", func); } PLASMA_Finalize(); return EXIT_SUCCESS; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *Q = NULL; float *W; PLASMA_desc *descA = NULL; PLASMA_desc *descQ = NULL; PLASMA_desc *descT = NULL; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; int uplo = PlasmaUpper; int vec = PlasmaNoVec; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (float *)malloc(lda*n*sizeof(float)); W = (float *)malloc(n*sizeof(float)); if (vec == PlasmaVec){ Q = (float *)malloc(lda*n*sizeof(float)); if ( (!Q) ) { printf("Out of Memory -Q-\n "); return -2; } } /* Check if unable to allocate memory */ if ( (!AT) || (!W) ) { printf("Out of Memory -\n "); return -2; } /* Initialize Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_splgsy_Tile((float)0.0, descA, 51 ); if (vec == PlasmaVec) PLASMA_Desc_Create(&descQ, Q, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); /* Save AT and bT in lapack layout for check */ if ( check ) { } /* Allocate Workspace */ PLASMA_Alloc_Workspace_ssyev(n, n, &descT); t = -cWtime(); PLASMA_ssyev_Tile( vec, uplo, descA, W, descT, descQ ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { } /* DeAllocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); if (vec == PlasmaVec) { PLASMA_Desc_Destroy(&descQ); free( Q ); } free( AT ); free( W ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *BT, *CT; float *A = NULL, *B = NULL, *C1 = NULL, *C2 = NULL; float alpha, beta; PLASMA_desc *descA, *descB, *descC; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; /* Allocate Data */ /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); AT = (float *)malloc(nt*nt*nb2*sizeof(float)); BT = (float *)malloc(nt*nt*nb2*sizeof(float)); CT = (float *)malloc(nt*nt*nb2*sizeof(float)); /* Check if unable to allocate memory */ if ( (!AT) || (!BT) || (!CT) ) { printf("Out of Memory \n "); exit(0); } #if defined(PLASMA_CUDA) cudaHostRegister(AT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); cudaHostRegister(BT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); cudaHostRegister(CT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); #endif /* Initialiaze Data */ LAPACKE_slarnv_work(1, ISEED, 1, &alpha); LAPACKE_slarnv_work(1, ISEED, 1, &beta); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, AT); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, BT); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, CT); /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, BT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descC, CT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); if (check) { C2 = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descC, (void*)C2, n); } #if defined(PLASMA_CUDA) core_cublas_init(); #endif t = -cWtime(); PLASMA_sgemm_Tile( PlasmaNoTrans, PlasmaNoTrans, alpha, descA, descB, beta, descC ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { A = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); free(AT); B = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descB, (void*)B, n); free(BT); C1 = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descC, (void*)C1, n); free(CT); dparam[TIMING_RES] = s_check_gemm( PlasmaNoTrans, PlasmaNoTrans, n, n, n, alpha, A, lda, B, lda, beta, C1, C2, lda, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(C2); } else { free( AT ); free( BT ); free( CT ); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descC); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *A, *Acpy = NULL, *b, *x; real_Double_t t; int *piv; int m = iparam[TIMING_M]; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; int ldb = m; /* Allocate Data */ A = (float *)malloc(lda*n*sizeof(float)); piv = (int *)malloc( min(m, n) * sizeof(int)); /* Check if unable to allocate memory */ if ( !A || !piv ){ printf("Out of Memory \n "); return -1; } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Initialize Data */ /*LAPACKE_slarnv_work(1, ISEED, n*lda, A);*/ PLASMA_splrnt(m, n, A, lda, 3456); /* Save AT in lapack layout for check */ if ( check && (m == n) ) { Acpy = (float *)malloc(lda*n*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', m, n, A, lda, Acpy, lda); } t = -cWtime(); PLASMA_sgetrf( m, n, A, lda, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check && (m == n) ) { b = (float *)malloc(ldb*nrhs *sizeof(float)); x = (float *)malloc(ldb*nrhs *sizeof(float)); LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, x); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', n, nrhs, x, ldb, b, ldb); PLASMA_sgetrs( PlasmaNoTrans, n, nrhs, A, lda, piv, x, ldb ); dparam[TIMING_RES] = s_check_solution(m, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free( Acpy ); free( b ); free( x ); } free( A ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A = NULL, *AT, *b, *bT, *x; PLASMA_desc *descA, *descB, *descL; real_Double_t t; int *piv; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_zlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgesv_incpiv_Tile(n, &descL, &piv); /* Save AT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_zgetrf_incpiv_Tile( descA, descL, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); bT = (PLASMA_Complex64_t *)malloc(nt*nb2 *sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, b); PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Lapack_to_Tile((void*)b, n, descB); PLASMA_zgetrs_incpiv_Tile( descA, descL, piv, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Deallocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descL); PLASMA_Desc_Destroy(&descA); free( AT ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A, *B1, *B2 = NULL; PLASMA_Complex64_t alpha; real_Double_t t; int n = iparam[TIMING_N]; int nrhs = n; int check = iparam[TIMING_CHECK]; int lda = n; /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); B1 = (PLASMA_Complex64_t *)malloc(lda*nrhs*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (!A) || (!B1) ) { printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* Initialiaze Data */ lapack_zlarnv(1, ISEED, n *lda, A ); lapack_zlarnv(1, ISEED, nrhs*lda, B1); lapack_zlarnv(1, ISEED, 1, &alpha); int i; for(i=0;i<max(n, nrhs);i++) A[lda*i+i] = A[lda*i+i] + 2.0; if (check) { B2 = (PLASMA_Complex64_t *)malloc(lda*nrhs*sizeof(PLASMA_Complex64_t)); memcpy(B2, B1, lda*nrhs*sizeof(PLASMA_Complex64_t)); } t = -cWtime(); PLASMA_ztrsm( PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaUnit, n, nrhs, alpha, A, lda, B1, lda ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = z_check_trsm( PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaUnit, n, nrhs, alpha, A, lda, B1, B2, lda, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(B2); } free( A ); free( B1 ); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; float *A1 = (float *)malloc(LDA*N*sizeof(float)); float *A2 = (float *)malloc(LDA*N*sizeof(float)); float *B1 = (float *)malloc(LDB*NRHS*sizeof(float)); float *B2 = (float *)malloc(LDB*NRHS*sizeof(float)); float *WORK = (float *)malloc(2*LDA*sizeof(float)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /*------------------------------------------------------------- * TESTING SPOSV */ /* Initialize A1 and A2 for Symmetric Positif Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); slagsy(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i] + (float)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_slarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA SPOSV */ info = PLASMA_sposv(PlasmaUpper, N, NRHS, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in SPOSV example ! \n"); else printf("-- Run of SPOSV example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
extern "C" void plasma_init(int num_cores) { PLASMA_Init(num_cores); }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *bT, *x; float *A = NULL; float *b = NULL; PLASMA_desc *descA, *descB; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Allocate Data */ AT = (float *)malloc(lda*n *sizeof(float)); bT = (float *)malloc(ldb*nrhs*sizeof(float)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs); LAPACKE_slarnv_work(1, ISEED, lda*n, AT); LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (float *)malloc(lda*n *sizeof(float)); b = (float *)malloc(ldb*nrhs*sizeof(float)); PLASMA_sTile_to_Lapack(descA, (void*)A, lda); PLASMA_sTile_to_Lapack(descB, (void*)b, ldb); } t = -cWtime(); PLASMA_sgesv_Tile( descA, piv, descB ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { x = (float *)malloc(ldb*nrhs *sizeof(float)); PLASMA_sTile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); free( AT ); free( bT ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, _PREC *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A, *Acpy = NULL; real_Double_t t; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int check = iparam[TIMING_CHECK]; n = ((n % nb) == 0) ? (n / nb) * nb : ((n / nb) + 1) * nb ; dparam[TIMING_ANORM] = (_PREC)n; dparam[TIMING_BNORM] = (_PREC)_FADDS; /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( (!A) ) { printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, n*n, A); /* Save A and b */ if (check) { Acpy = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), n, n, A, n, Acpy, n); } t = -cWtime(); PLASMA_cgecfi( n, n, A, PlasmaCM, n, 1, PlasmaCCRB, nb, nb); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = (_PREC)c_check_conversion(n, n, n, 1, nb, nb, Acpy, A, map_CM, map_CCRB); free(Acpy); } free( A ); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_Complex32_t *L; int *IPIV; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /*Plasma Initialize*/ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 Matrix */ LAPACKE_clarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA CGESV */ info = PLASMA_Alloc_Workspace_cgesv_incpiv(N, &L, &IPIV); info = PLASMA_cgesv_incpiv(N, NRHS, A2, LDA, L, IPIV, B2, LDB); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in CGESV example ! \n"); else printf("-- Run of CGESV example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); PLASMA_Finalize(); exit(0); }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *AT; real_Double_t t; PLASMA_desc *descA; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; PLASMA_enum uplo = PlasmaLower; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* * Initialize Data * It's done in static to avoid having the same sequence than one * the function we want to trace */ PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_dplgsy_Tile( (double)n, descA, 51 ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /* Save AT in lapack layout for check */ if ( check ) { } /* PLASMA DPOTRF / DTRTRI / DLAUUM */ /* * Example of the different way to combine several asynchonous calls */ { #if defined(TRACE_BY_SEQUENCE) PLASMA_sequence *sequence[3]; PLASMA_request request[3] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; PLASMA_Sequence_Create(&sequence[0]); PLASMA_Sequence_Create(&sequence[1]); PLASMA_Sequence_Create(&sequence[2]); t = -cWtime(); #if defined(POTRI_SYNC) PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[2]); #else PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_Sequence_Wait(sequence[2]); #endif t += cWtime(); PLASMA_Sequence_Destroy(sequence[0]); PLASMA_Sequence_Destroy(sequence[1]); PLASMA_Sequence_Destroy(sequence[2]); #else #if defined(POTRI_SYNC) t = -cWtime(); PLASMA_dpotrf_Tile(uplo, descA); PLASMA_dtrtri_Tile(uplo, PlasmaNonUnit, descA); PLASMA_dlauum_Tile(uplo, descA); t += cWtime(); #else /* Default: we use Asynchonous call with only one sequence */ PLASMA_sequence *sequence; PLASMA_request request[2] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; t = -cWtime(); PLASMA_Sequence_Create(&sequence); PLASMA_dpotrf_Tile_Async(uplo, descA, sequence, &request[0]); PLASMA_dpotri_Tile_Async(uplo, descA, sequence, &request[1]); PLASMA_Sequence_Wait(sequence); t += cWtime(); PLASMA_Sequence_Destroy(sequence); #endif #endif *t_ = t; } /* Check the solution */ if ( check ) { dparam[TIMING_ANORM] = 0.0; dparam[TIMING_XNORM] = 0.0; dparam[TIMING_BNORM] = 0.0; dparam[TIMING_RES] = 0.0; } PLASMA_Desc_Destroy(&descA); PLASMA_Finalize(); free(AT); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { PLASMA_Complex32_t *AT, *BT, *Q = NULL; float *W; PLASMA_desc *descA, *descB, *descQ, *descT; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; int itype = 1; int vec = PlasmaNoVec; int uplo = PlasmaUpper; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); // if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); /* else */ /* PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); */ /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t)); BT = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t)); W = (float *)malloc(n*sizeof(float)); if (vec == PlasmaVec){ Q = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t)); if ( (!Q) ) { printf("Out of Memory -Q-\n "); exit(0); } } /* Check if unable to allocate memory */ if ( (!AT) || (!BT) || (!W) ) { printf("Out of Memory -\n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_cplghe_Tile((float)0.0, descA, 51 ); PLASMA_Desc_Create(&descB, BT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_cplghe_Tile((float)n, descB, 51 ); PLASMA_Desc_Create(&descQ, Q, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); /* Save AT and bT in lapack layout for check */ if ( check ) { } /* Allocate Workspace */ PLASMA_Alloc_Workspace_chegv(n, n, &descT); t = -cWtime(); PLASMA_chegv_Tile( itype, vec, uplo, descA, descB, W, descT, descQ ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { } /* DeAllocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descQ); if (vec == PlasmaVec) free( Q ); free( AT ); free( W ); PLASMA_Finalize(); return 0; }