PYMIC_KERNEL void mic_syrk(const int64_t *dtype, const void *A_, void *C_, const int64_t *n, const int64_t *k, const int64_t *ldc, const void *alpha_, const void *beta_) { switch(*dtype) { case DTYPE_FLOAT: { const double *A = (const double *) A_; double *C = (double *) C_; const double *alpha = (const double *) alpha_; const double *beta = (const double *) beta_; cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, *n, *k, *alpha, A, *k, *beta, C, *ldc); } break; case DTYPE_COMPLEX: { const double complex *A = (const double complex *) A_; double complex *C = (double complex *) C_; const double complex *alpha = (const double complex *) alpha_; const double complex *beta = (const double complex *) beta_; cblas_zsyrk(CblasColMajor, CblasUpper, CblasTrans, *n, *k, alpha, A, *k, beta, C, *ldc); } break; } }
inline void syrk (CBLAS_ORDER const Order, CBLAS_UPLO const Uplo, CBLAS_TRANSPOSE const Trans, int const N, int const K, double const alpha, double const* A, int const lda, double const beta, double* C, int const ldc) { cblas_dsyrk (Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc); }
int check_orthogonality(int M, int N, int LDQ, double *Q) { double alpha, beta; double normQ; int info_ortho; int i; int minMN = min(M, N); double eps; double *work = (double *)malloc(minMN*sizeof(double)); eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; /* Build the idendity matrix USE DLASET?*/ double *Id = (double *) malloc(minMN*minMN*sizeof(double)); memset((void*)Id, 0, minMN*minMN*sizeof(double)); for (i = 0; i < minMN; i++) Id[i*minMN+i] = (double)1.0; /* Perform Id - Q'Q */ if (M >= N) cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N); else cblas_dsyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); normQ = LAPACKE_dlansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'u', minMN, Id, minMN, work); printf("============\n"); printf("Checking the orthogonality of Q \n"); printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps)); if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) { printf("-- Orthogonality is suspicious ! \n"); info_ortho=1; } else { printf("-- Orthogonality is CORRECT ! \n"); info_ortho=0; } free(work); free(Id); return info_ortho; }
void My_dsyrk(const enum CBLAS_UPLO Uplo,const enum CBLAS_TRANSPOSE Trans, double alpha, const gsl_matrix * A, double beta, gsl_matrix * C) { int K; if (Trans == CblasNoTrans) K = A->size2; else K = A->size1; int N = C->size1; cblas_dsyrk(CblasRowMajor, Uplo, Trans, N, K, alpha, A->data, A->tda, beta, C->data, C->tda); }
// Performs symmetric rank-k update of the submatrix. // Input to this step is the given submatrix and the output of the previous step. int S3_compute::execute(const triple & t, cholesky_context & c ) const { tile_const_ptr_type A_block; tile_const_ptr_type L1_block; tile_const_ptr_type L2_block; double temp; int b = c.b; const int k = t[0]; const int j = t[1]; const int i = t[2]; assert( j != k && i != k ); c.Lkji.get(triple(k, j, i), A_block); // Get the input tile. if(i==j) { // Diagonal tile. c.Lkji.get(triple(k+1,j,k), L2_block); // In case of a diagonal tile, i=j, hence both the tiles are the same. } else { // Non-diagonal tile. c.Lkji.get(triple(k+1,i,k), L2_block); // Get the first tile. c.Lkji.get(triple(k+1,j,k), L1_block); // Get the second tile. } #ifdef USE_MKL const double alpha = -1; const double beta = 1; if(i==j) { // Diagonal tile. cblas_dsyrk( CblasColMajor, CblasLower, CblasNoTrans, b, b, alpha, L2_block->get_array(), b, beta, const_cast< double* >( A_block->get_array() ), b ); } else { cblas_dgemm( CblasColMajor, CblasNoTrans, CblasTrans, b, b, b, alpha, L1_block->get_array(), b, L2_block->get_array(), b, beta, const_cast< double* >( A_block->get_array() ), b ); } #else for(int j_b = 0; j_b < b; j_b++) { for(int k_b = 0; k_b < b; k_b++) { temp = -1 * (*L2_block)( j_b, k_b ); if(i!=j) { for(int i_b = 0; i_b < b; i_b++) { const_cast< tile_type & >(*A_block)( i_b, j_b ) = (*A_block)( i_b, j_b ) + (temp * (*L1_block)( i_b, k_b )); } } else { for(int i_b = j_b; i_b < b; i_b++) { const_cast< tile_type & >(*A_block)( i_b, j_b ) = (*A_block)( i_b, j_b ) + (temp * (*L2_block)( i_b, k_b )); } } } } #endif c.Lkji.put(triple(k+1,j,i),A_block); // Write the output at the next time step. return CnC::CNC_Success; }
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_dsyrk (JNIEnv *env, jclass clazz, jint Order, jint Uplo, jint Trans, jint N, jint K, jdouble alpha, jobject A, jint offsetA, jint lda, jfloat beta, jobject C, jint offsetC, jint ldc) { double *cA = (double *) (*env)->GetDirectBufferAddress(env, A); double *cC = (double *) (*env)->GetDirectBufferAddress(env, C); cblas_dsyrk(Order, Uplo, Trans, N, K, alpha, cA + offsetA, lda, beta, cC + offsetC, ldc); };
/*------------------------------------------------------------------- * Check the orthogonality of Q */ static int check_orthogonality(int M, int N, double *Q, int LDQ, double eps) { double alpha = 1.0; double beta = -1.0; double normQ, result; int info_ortho; int minMN = min(M, N); double *work = (double *)malloc(minMN*sizeof(double)); /* Build the idendity matrix */ double *Id = (double *) malloc(minMN*minMN*sizeof(double)); LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN); /* Perform Id - Q'Q */ if (M >= N) cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N); else cblas_dsyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); normQ = LAPACKE_dlansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'U', minMN, Id, minMN, work); result = normQ / (minMN * eps); printf("============\n"); printf("Checking the orthogonality of Q \n"); printf("||Id-Q'*Q||_oo / (minMN*eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Orthogonality is suspicious ! \n"); info_ortho=1; } else { printf("-- Orthogonality is CORRECT ! \n"); info_ortho=0; } free(work); free(Id); return info_ortho; }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, double *A, int LDA, double beta, double *Cref, double *Cplasma, int LDC) { int info_solution; double Anorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; double eps; double beta_const; double result; double *work = (double *)malloc(max(N, K)* sizeof(double)); beta_const = -1.0; Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Cinitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_dsyrk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, (alpha), A, LDA, (beta), Cref, LDC); Clapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_daxpy(LDC*N, (beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference DSYRK \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int main(int argc, char **argv) { std::string interleaving; if (argc > 1) { interleaving = argv[1]; } else { interleaving = ""; } FILE *f = fopen("syrk.csv","a"); int n = 1024; double *C = (double*) malloc(n * n * sizeof(double)); double *A = (double*) malloc(n * n * sizeof(double)); double *C2 = (double*) malloc(n * n * sizeof(double)); double *A2 = (double*) malloc(n * n * sizeof(double)); initialize(C, A, C2, A2, n); SyrkProblem* problem = new SyrkProblem(C, A, n, n, n); struct timeval start, end; gettimeofday(&start, NULL); Framework::solve(problem, interleaving); gettimeofday(&end, NULL); double seconds = (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec); #ifdef DEBUG fprintf(f,"SYRK,%d,%s,%f,%ld,%ld\n", n, interleaving.c_str(), seconds, Memory::getMax(), Memory::getTotal()); printf("SYRK,%d,%s,%f,%ld,%ld\n", n, interleaving.c_str(), seconds, Memory::getMax(), Memory::getTotal()); #else fprintf(f,"SYRK,%d,%s,%f\n", n, interleaving.c_str(), seconds); printf("SYRK,%d,%s,%f\n", n, interleaving.c_str(), seconds); #endif // Correctness cblas_dsyrk(CblasColMajor, CblasLower, CblasNoTrans, n, n, -1.0, A2, n, 1.0, C2, n); for(int i = 0; i < n*n; i++) { if ((fabs(C[i] - C2[i]) / C[i]) > .0000000001) { printf("ERROR: %f\n", fabs((C[i] - C2[i]) / C[i])); exit(EXIT_FAILURE); } } printf("test passed\n"); free(C); free(A); free(C2); free(A2); delete problem; fclose(f); }
// This task performs a symmetric rank-k update, which is the // third step in the tiled Cholesky factorization ocrGuid_t cblas_dsyrk_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 *func_args = paramv; u32 k = (u32) func_args[0]; u32 j = (u32) func_args[1]; u32 i = (u32) func_args[2]; u32 tileSize = (u32) func_args[3]; ocrGuid_t out_lkji_jjkp1_event_guid = (ocrGuid_t) func_args[4]; // PRINTF("RUNNING update_diagonal (%d, %d, %d)\n", k, j, i); double* aBlock = (double*) (depv[0].ptr); double* l2Block = (double*) (depv[1].ptr); // Symmetric Rank-k Update C = alpha AA' + beta C, where alpha = -1, beta = 1 cblas_dsyrk(CblasRowMajor, CblasLower, CblasNoTrans, tileSize, tileSize, -1.0, l2Block, tileSize, 1.0, // A matrix aBlock, tileSize); // C matrix, solution put here in lower triangle ocrEventSatisfy(out_lkji_jjkp1_event_guid, depv[0].guid); return NULL_GUID; }
void SyrkProblem::runBaseCase() { cblas_dsyrk(CblasColMajor, CblasLower, CblasNoTrans, n, n, -1.0, A, lda, 1.0, C, ldc); }
void wrapper_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc) { cblas_dsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc); }
inline void herk( const Order order, const UpLo uplo, const Trans trans, const int n, const int k, const double alpha, const double* a, const int lda, const double beta, double* c, const int ldc ) { cblas_dsyrk( cblas_option< Order >::value, cblas_option< UpLo >::value, cblas_option< Trans >::value, n, k, alpha, a, lda, beta, c, ldc ); }
