int check_factorization(int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, int uplo) { float Anorm, Rnorm; PLASMA_Complex32_t alpha; int info_factorization; int i,j; float eps; eps = LAPACKE_slamch_work('e'); PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); float *work = (float *)malloc(N*sizeof(float)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t)); alpha= 1.0; LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ if (uplo == PlasmaUpper){ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } else{ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } /* Compute the Residual || A -L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
void CORE_clacpy(PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB) { LAPACKE_clacpy_work( LAPACK_COL_MAJOR, lapack_const(uplo), M, N, A, LDA, B, LDB); }
void CORE_clacpy_quark(Quark *quark) { PLASMA_enum uplo; int M; int N; PLASMA_Complex32_t *A; int LDA; PLASMA_Complex32_t *B; int LDB; quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB); LAPACKE_clacpy_work( LAPACK_COL_MAJOR, lapack_const(uplo), M, N, A, LDA, B, LDB); }
static int RunTest(int *iparam, _PREC *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A, *Acpy = NULL; real_Double_t t; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int check = iparam[TIMING_CHECK]; n = ((n % nb) == 0) ? (n / nb) * nb : ((n / nb) + 1) * nb ; dparam[TIMING_ANORM] = (_PREC)n; dparam[TIMING_BNORM] = (_PREC)_FADDS; /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( (!A) ) { printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, n*n, A); /* Save A and b */ if (check) { Acpy = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), n, n, A, n, Acpy, n); } t = -cWtime(); PLASMA_cgecfi( n, n, A, PlasmaCM, n, 1, PlasmaCCRB, nb, nb); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = (_PREC)c_check_conversion(n, n, n, 1, nb, nb, Acpy, A, map_CM, map_CCRB); free(Acpy); } free( A ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex32_t *A, *AT, *A2 = NULL; PLASMA_desc *descA; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int nb = iparam[TIMING_NB]; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); AT = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( ( !AT ) || (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, lda*n, A); /* for(i=0; i<n; i++) { */ /* A[i*lda+i] += (float)m; */ /* } */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, m, n); PLASMA_cLapack_to_Tile((void*)A, lda, descA); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save AT in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_cgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_cgetrf_rectil_init(); t = -cWtime(); QUARK_CORE_cgetrf_rectil(plasma->quark, &task_flags, *descA, AT, descA->mb*descA->nb, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { float *work = (float *)malloc(max(m,n)*sizeof(float)); PLASMA_cTile_to_Lapack(descA, (void*)A, lda); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, crealf(A[ i * lda + i ]), i, ipiv2[i], i, crealf(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_caxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } /* Deallocate Workspace */ PLASMA_Desc_Destroy(&descA); free( A ); free( AT ); free( ipiv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, _PREC *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A = NULL, *AT; PLASMA_desc *descA; real_Double_t t; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int check = iparam[TIMING_CHECK]; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ n = ((n % nb) == 0) ? (n / nb) * nb : ((n / nb) + 1) * nb ; dparam[TIMING_ANORM] = (_PREC)n; /* Allocate Data */ AT = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( (!AT) ) { printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_clarnv_work(1, ISEED, n*n, AT); /* Save A and b */ if (check) { A = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), n, n, AT, n, A, n); } t = -cWtime(); PLASMA_Lapack_to_Tile( (void *)A, n, descA); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = (_PREC)c_check_conversion(n, n, n, 1, nb, nb, A, AT, map_CM, map_CCRB); free(A); } PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A, *Acpy = NULL, *L, *b, *x; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( !A ){ printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, n*lda, A); /* Allocate Workspace */ PLASMA_Alloc_Workspace_cgesv_incpiv(n, &L, &piv); /* Save AT in lapack layout for check */ if ( check ) { Acpy = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); } t = -cWtime(); PLASMA_cgetrf_incpiv( n, n, A, lda, L, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (PLASMA_Complex32_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex32_t)); x = (PLASMA_Complex32_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex32_t)); LAPACKE_clarnv_work(1, ISEED, ldb*nrhs, x); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, x, ldb, b, ldb); PLASMA_cgetrs_incpiv( PlasmaNoTrans, n, nrhs, A, lda, L, piv, x, ldb ); dparam[TIMING_RES] = c_check_solution(n, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free( Acpy ); free( b ); free( x ); } free( A ); free( L ); free( piv ); PLASMA_Finalize(); return 0; }
static int check_factorization(int M, int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, PLASMA_Complex32_t *Q, float eps ) { float Anorm, Rnorm; PLASMA_Complex32_t alpha, beta; int info_factorization; int i,j; PLASMA_Complex32_t *Ql = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t)); float *work = (float *)malloc(max(M,N)*sizeof(float)); alpha=1.0; beta=0.0; if (M >= N) { /* Extract the R */ PLASMA_Complex32_t *R = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); memset((void*)R, 0, N*N*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N); /* Perform Ql=Q*R */ memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M); free(R); } else { /* Extract the L */ PLASMA_Complex32_t *L = (PLASMA_Complex32_t *)malloc(M*M*sizeof(PLASMA_Complex32_t)); memset((void*)L, 0, M*M*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M); /* Perform Ql=LQ */ memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M); free(L); } /* Compute the Residual */ for (i = 0; i < M; i++) for (j = 0 ; j < N; j++) Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i]; BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, N, Residual, M, &Rnorm ); BLAS_cge_norm( blas_colmajor, blas_inf_norm, M, N, A2, LDA, &Anorm ); if (M >= N) { printf("============\n"); printf("Checking the QR Factorization \n"); printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } else { printf("============\n"); printf("Checking the LQ Factorization \n"); printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) { printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else { printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(work); free(Ql); free(Residual); return info_factorization; }