int main () { int cores = 2; int N = 10 ; int LDA = 10 ; int info; int info_factorization; int i,j; int NminusOne = N-1; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDA*sizeof(PLASMA_Complex32_t)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 for Symmetric Positive Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); claghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i]+ (PLASMA_Complex32_t)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Plasma routines */ PLASMA_cpotrf(PlasmaUpper, N, A2, LDA); /* Check the factorization */ info_factorization = check_factorization( N, A1, A2, LDA, PlasmaUpper); if ((info_factorization != 0)|(info != 0)) printf("-- Error in CPOTRF example ! \n"); else printf("-- Run of CPOTRF example successful ! \n"); free(A1); free(A2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
int main () { int cores = 2; int N = 10 ; int LDA = 10 ; int NRHS = 5 ; int LDB = 10 ; int info; int info_solution; int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDA*sizeof(PLASMA_Complex32_t)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)) { printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 for Symmetric Positive Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); claghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++) { A1[LDA*i+i] = A1[LDA*i+i]+ (PLASMA_Complex32_t)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA routines */ info = PLASMA_cpotrf(PlasmaLower, N, A2, LDA); info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB); info = PLASMA_ctrsm(PlasmaLeft, PlasmaLower, PlasmaConjTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in CTRSM example ! \n"); else printf("-- Run of CTRSM example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
int testing_cher2k(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("HER2K", "alpha beta M N LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - N : number of columns and rows of matrix C and number of row of matrix A and B\n" " - K : number of columns of matrix A and B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex32_t alpha = (PLASMA_Complex32_t) atol(argv[0]); float beta = (float) atol(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int NKmax = max(N, K); int NminusOne = N - 1; float eps; int info_solution; int info, u, t; size_t LDAxK = LDA*NKmax; size_t LDBxK = LDB*NKmax; size_t LDCxN = LDC*N; PLASMA_Complex32_t *A = (PLASMA_Complex32_t *)malloc(LDAxK*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(LDBxK*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *C = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cinit = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cfinal = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDC*sizeof(PLASMA_Complex32_t)); float *D = (float *) malloc(LDC *sizeof(float)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Cinit) || (!Cfinal) || (!D) ){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA CHER2K ROUTINE ------- \n"); printf(" Size of the Matrix C %d by %d\n", N, K); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING CHER2K */ /* Initialize A,B */ LAPACKE_clarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_clarnv_work(IONE, ISEED, LDBxK, B); /* Initialize C */ LAPACKE_slarnv_work(IONE, ISEED, LDC, D); claghe(&N, &NminusOne, D, C, &LDC, ISEED, WORK, &info); free(D); free(WORK); for (u=0; u<2; u++) { for (t=0; t<3; t++) { if (trans[t] == PlasmaTrans) continue; memcpy(Cinit, C, LDCxN*sizeof(PLASMA_Complex32_t)); memcpy(Cfinal, C, LDCxN*sizeof(PLASMA_Complex32_t)); /* PLASMA CHER2K */ PLASMA_cher2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING CHER2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING CHER2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *BT, *CT; float *A = NULL, *B = NULL, *C1 = NULL, *C2 = NULL; float alpha, beta; PLASMA_desc *descA, *descB, *descC; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; /* Allocate Data */ /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); AT = (float *)malloc(nt*nt*nb2*sizeof(float)); BT = (float *)malloc(nt*nt*nb2*sizeof(float)); CT = (float *)malloc(nt*nt*nb2*sizeof(float)); /* Check if unable to allocate memory */ if ( (!AT) || (!BT) || (!CT) ) { printf("Out of Memory \n "); exit(0); } #if defined(PLASMA_CUDA) cudaHostRegister(AT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); cudaHostRegister(BT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); cudaHostRegister(CT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); #endif /* Initialiaze Data */ LAPACKE_slarnv_work(1, ISEED, 1, &alpha); LAPACKE_slarnv_work(1, ISEED, 1, &beta); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, AT); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, BT); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, CT); /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, BT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descC, CT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); if (check) { C2 = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descC, (void*)C2, n); } #if defined(PLASMA_CUDA) core_cublas_init(); #endif t = -cWtime(); PLASMA_sgemm_Tile( PlasmaNoTrans, PlasmaNoTrans, alpha, descA, descB, beta, descC ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { A = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); free(AT); B = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descB, (void*)B, n); free(BT); C1 = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descC, (void*)C1, n); free(CT); dparam[TIMING_RES] = s_check_gemm( PlasmaNoTrans, PlasmaNoTrans, n, n, n, alpha, A, lda, B, lda, beta, C1, C2, lda, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(C2); } else { free( AT ); free( BT ); free( CT ); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descC); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *bT, *x; float *A = NULL; float *b = NULL; PLASMA_desc *descA, *descB; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Allocate Data */ AT = (float *)malloc(lda*n *sizeof(float)); bT = (float *)malloc(ldb*nrhs*sizeof(float)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs); LAPACKE_slarnv_work(1, ISEED, lda*n, AT); LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (float *)malloc(lda*n *sizeof(float)); b = (float *)malloc(ldb*nrhs*sizeof(float)); PLASMA_sTile_to_Lapack(descA, (void*)A, lda); PLASMA_sTile_to_Lapack(descB, (void*)b, ldb); } t = -cWtime(); PLASMA_sgesv_Tile( descA, piv, descB ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { x = (float *)malloc(ldb*nrhs *sizeof(float)); PLASMA_sTile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); free( AT ); free( bT ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *A, *Acpy = NULL, *b, *x; real_Double_t t; int *piv; int m = iparam[TIMING_M]; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; int ldb = m; /* Allocate Data */ A = (float *)malloc(lda*n*sizeof(float)); piv = (int *)malloc( min(m, n) * sizeof(int)); /* Check if unable to allocate memory */ if ( !A || !piv ){ printf("Out of Memory \n "); return -1; } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Initialize Data */ /*LAPACKE_slarnv_work(1, ISEED, n*lda, A);*/ PLASMA_splrnt(m, n, A, lda, 3456); /* Save AT in lapack layout for check */ if ( check && (m == n) ) { Acpy = (float *)malloc(lda*n*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', m, n, A, lda, Acpy, lda); } t = -cWtime(); PLASMA_sgetrf( m, n, A, lda, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check && (m == n) ) { b = (float *)malloc(ldb*nrhs *sizeof(float)); x = (float *)malloc(ldb*nrhs *sizeof(float)); LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, x); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', n, nrhs, x, ldb, b, ldb); PLASMA_sgetrs( PlasmaNoTrans, n, nrhs, A, lda, piv, x, ldb ); dparam[TIMING_RES] = s_check_solution(m, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free( Acpy ); free( b ); free( x ); } free( A ); free( piv ); PLASMA_Finalize(); return 0; }
lapack_int LAPACKE_slarnv( lapack_int idist, lapack_int* iseed, lapack_int n, float* x ) { return LAPACKE_slarnv_work( idist, iseed, n, x ); }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *A = NULL, *AT, *b = NULL, *bT, *x; PLASMA_desc *descA, *descB, *descT; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (float *)malloc(nt*nt*nb2*sizeof(float)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_sgels_Tile(n, n, &descT); /* Save AT in lapack layout for check */ if ( check ) { A = (float *)malloc(lda*n *sizeof(float)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_sgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (float *)malloc(ldb*nrhs *sizeof(float)); bT = (float *)malloc(nt*nb2 *sizeof(float)); x = (float *)malloc(ldb*nrhs *sizeof(float)); LAPACKE_slarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_sgeqrs_Tile( descA, descT, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; float *A1 = (float *)malloc(LDA*N*(sizeof*A1)); float *A2 = (float *)malloc(LDA*N*(sizeof*A2)); float *B1 = (float *)malloc(LDB*NRHS*(sizeof*B1)); float *B2 = (float *)malloc(LDB*NRHS*(sizeof*B2)); float *L; int *IPIV; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /*Plasma Initialize*/ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_slarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* Allocate L and IPIV */ info = PLASMA_Alloc_Workspace_sgetrf_incpiv(N, N, &L, &IPIV); /* LU factorization of the matrix A */ info = PLASMA_sgetrf_incpiv(N, N, A2, LDA, L, IPIV); /* Solve the problem */ info = PLASMA_sgetrs_incpiv(PlasmaNoTrans, N, NRHS, A2, LDA, L, IPIV, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in SGETRS example ! \n"); else printf("-- Run of SGETRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); PLASMA_Finalize(); exit(0); }
int testing_strsm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 5 ) { USAGE("TRSM", "alpha M N LDA LDB", " - alpha : alpha coefficient\n" " - M : number of rows of matrices B\n" " - N : number of columns of matrices B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n"); return -1; } float alpha = (float) atol(argv[0]); int M = atoi(argv[1]); int N = atoi(argv[2]); int LDA = atoi(argv[3]); int LDB = atoi(argv[4]); float eps; int info_solution; int s, u, t, d, i; int LDAxM = LDA*max(M,N); int LDBxN = LDB*max(M,N); float *A = (float *)malloc(LDAxM*sizeof(float)); float *B = (float *)malloc(LDBxN*sizeof(float)); float *Binit = (float *)malloc(LDBxN*sizeof(float)); float *Bfinal = (float *)malloc(LDBxN*sizeof(float)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Binit) || (!Bfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA STRSM ROUTINE ------- \n"); printf(" Size of the Matrix B : %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING STRSM */ /* Initialize A, B, C */ LAPACKE_slarnv_work(IONE, ISEED, LDAxM, A); LAPACKE_slarnv_work(IONE, ISEED, LDBxN, B); for(i=0;i<max(M,N);i++) A[LDA*i+i] = A[LDA*i+i] + 2.0; for (s=0; s<2; s++) { for (u=0; u<2; u++) { #ifdef COMPLEX for (t=0; t<3; t++) { #else for (t=0; t<2; t++) { #endif for (d=0; d<2; d++) { memcpy(Binit, B, LDBxN*sizeof(float)); memcpy(Bfinal, B, LDBxN*sizeof(float)); /* PLASMA STRSM */ PLASMA_strsm(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Bfinal, LDB); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Binit, Bfinal, LDB); printf("***************************************************\n"); if (info_solution == 0) { printf(" ---- TESTING STRSM (%s, %s, %s, %s) ...... PASSED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } else { printf(" ---- TESTING STRSM (%s, %s, %s, %s) ... FAILED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } printf("***************************************************\n"); } } } } free(A); free(B); free(Binit); free(Bfinal); return 0; } /*-------------------------------------------------------------- * Check the solution */ static int check_solution(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag, int M, int N, float alpha, float *A, int LDA, float *Bref, float *Bplasma, int LDB) { int info_solution; float Anorm, Binitnorm, Bplasmanorm, Blapacknorm, Rnorm, result; float eps; float mzone = (float)-1.0; float *work = (float *)malloc(max(M, N)* sizeof(float)); int Am, An; if (side == PlasmaLeft) { Am = M; An = M; } else { Am = N; An = N; } Anorm = LAPACKE_slantr_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), lapack_const(uplo), lapack_const(diag), Am, An, A, LDA, work); Binitnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); Bplasmanorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bplasma, LDB, work); cblas_strsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, (alpha), A, LDA, Bref, LDB); Blapacknorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); cblas_saxpy(LDB * N, (mzone), Bplasma, 1, Bref, 1); Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Binitnorm %e, Bplasmanorm %e, Blapacknorm %e\n", Rnorm, Anorm, Binitnorm, Bplasmanorm, Blapacknorm); result = Rnorm / ((Anorm + Blapacknorm) * max(M,N) * eps); printf("============\n"); printf("Checking the norm of the difference against reference STRSM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo).N.eps) = %e \n", result); if ( isinf(Blapacknorm) || isinf(Bplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int testing_csymm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient \n" " - beta : beta coefficient \n" " - M : number of rows of matrices A and C \n" " - N : number of columns of matrices B and C \n" " - LDA : leading dimension of matrix A \n" " - LDB : leading dimension of matrix B \n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex32_t alpha = (PLASMA_Complex32_t) atol(argv[0]); PLASMA_Complex32_t beta = (PLASMA_Complex32_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int MNmax = max(M, N); int MminusOne = MNmax - 1; float eps; int info_solution; int i, j, s, u, info; int LDAxM = LDA*max(M, N); int LDBxN = LDB*N; int LDCxN = LDC*N; PLASMA_Complex32_t *A = (PLASMA_Complex32_t *)malloc(LDAxM*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(LDBxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *C = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cinit = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cfinal = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDC*sizeof(PLASMA_Complex32_t)); float *D = (float *) malloc(LDC *sizeof(float)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA CSYMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING CSYMM */ /* Initialize A */ LAPACKE_slarnv_work(IONE, ISEED, LDC, D); claghe(&MNmax, &MminusOne, D, A, &LDA, ISEED, WORK, &info); free(D); free(WORK); /* Initialize B */ LAPACKE_clarnv_work(IONE, ISEED, LDBxN, B); /* Initialize C */ LAPACKE_clarnv_work(IONE, ISEED, LDCxN, C); for (s=0; s<2; s++) { for (u=0; u<2; u++) { /* Initialize Cinit / Cfinal */ for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA CSYMM */ PLASMA_csymm(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING CSYMM (%5s, %5s) ....... PASSED !\n", sidestr[s], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING CSYMM (%s, %s) ... FAILED !\n", sidestr[s], uplostr[u]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; float *A1 = (float *)malloc(LDA*N*sizeof(float)); float *A2 = (float *)malloc(LDA*N*sizeof(float)); float *B1 = (float *)malloc(LDB*NRHS*sizeof(float)); float *B2 = (float *)malloc(LDB*NRHS*sizeof(float)); float *WORK = (float *)malloc(2*LDA*sizeof(float)); float *D = (float *)malloc(LDA*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialize */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /*------------------------------------------------------------- * TESTING SPOSV */ /* Initialize A1 and A2 for Symmetric Positif Matrix */ LAPACKE_slarnv_work(IONE, ISEED, LDA, D); slagsy(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i] + (float)N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_slarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA SPOSV */ info = PLASMA_sposv(PlasmaUpper, N, NRHS, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in SPOSV example ! \n"); else printf("-- Run of SPOSV example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(WORK); free(D); PLASMA_Finalize(); exit(0); }
int main () { int cores = 2; int M = 15; int N = 10; int LDA = 15; int NRHS = 5; int LDB = 15; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; float *A1 = (float *)malloc(LDA*N*sizeof(float)); float *A2 = (float *)malloc(LDA*N*sizeof(float)); float *B1 = (float *)malloc(LDB*NRHS*sizeof(float)); float *B2 = (float *)malloc(LDB*NRHS*sizeof(float)); float *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_sgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_slarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Initialize B1 and B2 */ LAPACKE_slarnv_work(IONE, ISEED, LDBxNRHS, B1); for (i = 0; i < M; i++) for (j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_sgeqrf(M, N, A2, LDA, T); /* Solve the problem */ info = PLASMA_sormqr(PlasmaLeft, PlasmaTrans, M, NRHS, N, A2, LDA, T, B2, LDB); info = PLASMA_strsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (float)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in SORMQR example ! \n"); else printf("-- Run of SORMQR example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(T); PLASMA_Finalize(); exit(0); }
int testing_sgemm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 8) { USAGE("GEMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - M : number of rows of matrices A and C\n" " - N : number of columns of matrices B and C\n" " - K : number of columns of matrix A / number of rows of matrix B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } float alpha = (float) atol(argv[0]); float beta = (float) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int K = atoi(argv[4]); int LDA = atoi(argv[5]); int LDB = atoi(argv[6]); int LDC = atoi(argv[7]); float eps; int info_solution; int i, j, ta, tb; int LDAxK = LDA*max(M,K); int LDBxN = LDB*max(K,N); int LDCxN = LDC*N; float *A = (float *)malloc(LDAxK*sizeof(float)); float *B = (float *)malloc(LDBxN*sizeof(float)); float *C = (float *)malloc(LDCxN*sizeof(float)); float *Cinit = (float *)malloc(LDCxN*sizeof(float)); float *Cfinal = (float *)malloc(LDCxN*sizeof(float)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA SGEMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING SGEMM */ /* Initialize A, B, C */ LAPACKE_slarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_slarnv_work(IONE, ISEED, LDBxN, B); LAPACKE_slarnv_work(IONE, ISEED, LDCxN, C); #ifdef COMPLEX for (ta=0; ta<3; ta++) { for (tb=0; tb<3; tb++) { #else for (ta=0; ta<2; ta++) { for (tb=0; tb<2; tb++) { #endif for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA SGEMM */ PLASMA_sgemm(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING SGEMM (%s, %s) ............... PASSED !\n", transstr[ta], transstr[tb]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING SGEMM (%s, %s) ... FAILED !\n", transstr[ta], transstr[tb]); printf("************************************************\n"); } } } #ifdef _UNUSED_ }} #endif free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT; PLASMA_desc *descA, *descT; real_Double_t t; int nb; //int M = N/nrhs; // (ALSO USED IN TIMING.C) int M = iparam[TIMING_M]; int N = iparam[TIMING_N]; int lda = M; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; /* Householder mode */ //PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_FLAT_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_TREE_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_SIZE, 4); /* Allocate Data */ AT = (float *)malloc(lda*N*sizeof(float)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, M, N, 0, 0, M, N); LAPACKE_slarnv_work(1, ISEED, lda*N, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_sgels_Tile(M, N, &descT); t = -cWtime(); PLASMA_sgelqf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }