void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TA, const enum CBLAS_TRANSPOSE TB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) { int info=2000; #ifndef NoCblasErrorChecks if (M < 0) info = cblas_errprn(4, info, "M cannot be less than zero 0,; is set to %d.", M); if (N < 0) info = cblas_errprn(5, info, "N cannot be less than zero 0,; is set to %d.", N); if (K < 0) info = cblas_errprn(6, info, "K cannot be less than zero 0,; is set to %d.", K); if (Order == CblasRowMajor) { if (TA == CblasNoTrans) { if ( (lda < K) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(K,1): lda=%d K=%d", lda, K); } else { if (TA != CblasConjTrans && TA != CblasTrans) info = cblas_errprn(2, info, "TransA must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TA); if ( (lda < M) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M); } if (TB == CblasNoTrans) { if ( (ldb < N) || (ldb < 1) ) info = cblas_errprn(11, info,"ldb must be >= MAX(N,1): ldb=%d N=%d", ldb, N); } else { if (TB != CblasConjTrans && TB != CblasTrans) info = cblas_errprn(3, info, "TransB must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TB); if ( (ldb < K) || (ldb < 1) ) info = cblas_errprn(11, info,"ldb must be >= MAX(N,1): ldb=%d K=%d", ldb, K); } if ( (ldc < N) || (ldc < 1) ) info = cblas_errprn(14, info,"ldc must be >= MAX(N,1): ldc=%d N=%d", ldc, N); } else if (Order == CblasColMajor) { if (TA == CblasNoTrans) { if ( (lda < M) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M); } else { if (TA != CblasConjTrans && TA != CblasTrans) info = cblas_errprn(2, info, "TransA must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TA); if ( (lda < K) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(K,1): lda=%d K=%d", lda, K); } if (TB == CblasNoTrans) { if ( (ldb < K) || (ldb < 1) ) info = cblas_errprn(11,info, "ldb must be >= MAX(K,1): ldb=%d K=%d", ldb, K); } else { if (TB != CblasConjTrans && TB != CblasTrans) info = cblas_errprn(3, info, "TransB must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TB); if ( (ldb < N) || (ldb < 1) ) info = cblas_errprn(11,info, "ldb must be >= MAX(K,1): ldb=%d K=%d", ldb, K); } if ( (ldc < M) || (ldc < 1) ) info = cblas_errprn(14, info,"ldc must be >= MAX(M,1): ldc=%d M=%d", ldc, M); } else info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (info != 2000) { cblas_xerbla(info, "cblas_dgemm", ""); return; } #endif if (Order == CblasColMajor) ATL_dgemm(TA, TB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); else ATL_dgemm(TB, TA, N, M, K, alpha, B, ldb, A, lda, beta, C, ldc); }
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TA, const enum CBLAS_TRANSPOSE TB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) { int info=2000; #ifndef NoCblasErrorChecks if (M < 0) info = cblas_errprn(4, info, "M cannot be less than zero 0,; is set to %d.", M); if (N < 0) info = cblas_errprn(5, info, "N cannot be less than zero 0,; is set to %d.", N); if (K < 0) info = cblas_errprn(6, info, "K cannot be less than zero 0,; is set to %d.", K); if (Order == CblasRowMajor) { if (TA == CblasNoTrans) { if ( (lda < K) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(K,1): lda=%d K=%d", lda, K); } else { if (TA != CblasConjTrans && TA != CblasTrans) info = cblas_errprn(2, info, "TransA must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TA); if ( (lda < M) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M); } if (TB == CblasNoTrans) { if ( (ldb < N) || (ldb < 1) ) info = cblas_errprn(11, info,"ldb must be >= MAX(N,1): ldb=%d N=%d", ldb, N); } else { if (TB != CblasConjTrans && TB != CblasTrans) info = cblas_errprn(3, info, "TransB must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TB); if ( (ldb < K) || (ldb < 1) ) info = cblas_errprn(11, info,"ldb must be >= MAX(K,1): ldb=%d K=%d", ldb, K); } if ( (ldc < N) || (ldc < 1) ) info = cblas_errprn(14, info,"ldc must be >= MAX(N,1): ldc=%d N=%d", ldc, N); } else if (Order == CblasColMajor) { if (TA == CblasNoTrans) { if ( (lda < M) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M); } else { if (TA != CblasConjTrans && TA != CblasTrans) info = cblas_errprn(2, info, "TransA must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TA); if ( (lda < K) || (lda < 1) ) info = cblas_errprn(9, info, "lda must be >= MAX(K,1): lda=%d K=%d", lda, K); } if (TB == CblasNoTrans) { if ( (ldb < K) || (ldb < 1) ) info = cblas_errprn(11,info, "ldb must be >= MAX(K,1): ldb=%d K=%d", ldb, K); } else { if (TB != CblasConjTrans && TB != CblasTrans) info = cblas_errprn(3, info, "TransB must be %d, %d or %d, but is set to %d", CblasNoTrans, CblasTrans, CblasConjTrans, TB); if ( (ldb < N) || (ldb < 1) ) info = cblas_errprn(11,info, "ldb must be >= MAX(K,1): ldb=%d K=%d", ldb, K); } if ( (ldc < M) || (ldc < 1) ) info = cblas_errprn(14, info,"ldc must be >= MAX(M,1): ldc=%d M=%d", ldc, M); } else info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d", CblasRowMajor, CblasColMajor, Order); if (info != 2000) { cblas_xerbla(info, "cblas_dgemm", ""); return; } #endif /* * Call SYRK when that's what the user is actually asking for; just handle * beta=0, because beta=X requires we copy C and then subtract to preserve * asymmetry */ if (A == B && M == N && TA != TB && lda == ldb && beta == 0.0) { ATL_dsyrk(CblasUpper, (Order == CblasColMajor) ? TA : TB, N, K, alpha, A, lda, beta, C, ldc); ATL_dsyreflect(CblasUpper, N, C, ldc); return; } if (Order == CblasColMajor) ATL_dgemm(TA, TB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); else ATL_dgemm(TB, TA, N, M, K, alpha, B, ldb, A, lda, beta, C, ldc); }
int main(int argc, char** argv) { signal(SIGINT, INThandler); double *dA = (double*) malloc( sizeof(double) * max_entries ); double *dB = (double*) malloc( sizeof(double) * max_entries ); double *dC1 = (double*) malloc( sizeof(double) * max_entries ); double *dC2 = (double*) malloc( sizeof(double) * max_entries ); double *dC3 = (double*) malloc( sizeof(double) * max_entries ); double *dCo = (double*) malloc( sizeof(double) * max_entries ); double dAlpha = 1.3; double dBeta = 3.7; srand(time(0)); MSG("Starting Test.\n"); MSG("Generate Random Array."); int c; int p = (int)pow(2, max_exp); for (c=0; c<p; c++) dA[c] = (double)(rand()%500/100.0); for (c=0; c<p; c++) dB[c] = (double)(rand()%500/100.0); for (c=0; c<p; c++) dC1[c] = (double)(rand()%500/100.0); memcpy(dC2, dC1, p*sizeof(double)); memcpy(dC3, dC1, p*sizeof(double)); memcpy(dCo, dC1, p*sizeof(double)); MSG("Done."); MSG("ATL_dgemm"); ATL_dgemm((CBLAS_TRANSPOSE)liftracc_no_trans, (CBLAS_TRANSPOSE)liftracc_no_trans, p, p, p, dAlpha, dA, p, dB, p, dBeta, dC1, p); MSG("end"); MSG("cblas_dgemm"); inner_cblas_dgemm((CBLAS_ORDER)liftracc_col_major, (CBLAS_TRANSPOSE)liftracc_no_trans, (CBLAS_TRANSPOSE)liftracc_no_trans, p, p, p, dAlpha, dA, p, dB, p, dBeta, dC2, p); MSG("end"); MSG("goto2_dgemm"); void *goto2_handle = 0; if (!goto2_handle) goto2_handle = dlopen("libgoto2.so", RTLD_LAZY); void (*goto2_dgemm)(const liftracc_order_t order, const liftracc_transpose_t transa, const liftracc_transpose_t transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc); *(void **) (&goto2_dgemm) = dlsym(goto2_handle, "cblas_dgemm"); (*goto2_dgemm)(liftracc_col_major, liftracc_no_trans, liftracc_no_trans, p, p, p, dAlpha, dA, p, dB, p, dBeta, dC3, p); MSG("end"); /* MSG("clearspeed_dgemm"); void *clear_handle = 0; if (!clear_handle) clear_handle = dlopen("libcsxl_mkl.so", RTLD_LAZY); void (*clear_dgemm)( *(void **) (&clear_dgemm) = dlsym(handle, liftracc_clear_function_names[i]); MSG("end"); */ for (c=0; c<p; c++) { if (round(dC1[c]*1000)/1000 != round(dC2[c]*1000)/1000) { MSG("ERROR dCo[%d] = %f :: dC1[%d] = %f != dC2[%d] = %f -- dC3[%d] = %f", c, dCo[c], c, dC1[c], c, dC2[c], c, dC3[c]); } } MSG("Finish."); return error_count; }