void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; PRINT_DEBUG_NAME; #else void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ PRINT_DEBUG_CNAME; #endif if (n <= 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * COMPSIZE; if (incy < 0) y -= (n - 1) * incy * COMPSIZE; COPY_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(COMPSIZE, COMPSIZE * n, 0); IDEBUG_END; return; }
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY) { BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; FLOAT alpha = *ALPHA; FLOAT beta = *BETA; #else void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy) { #endif if (n <= 0) return; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; AXPBY_K(n, alpha, x, incx, beta, y, incy); FUNCTION_PROFILE_END(1, 2 * n, 2 * n); return; }
double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; double ret = 0.0; PRINT_DEBUG_NAME; if (n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; ret=DSDOT_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; return ret; }
FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ FLOAT ret; PRINT_DEBUG_CNAME; if (n <= 0) return 0.; IDEBUG_START; FUNCTION_PROFILE_START(); ret = NRM2_K(n, x, incx); FUNCTION_PROFILE_END(COMPSIZE, n, 2 * n); IDEBUG_END; return ret; }
FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ BLASLONG n = *N; BLASLONG incx = *INCX; FLOATRET ret; PRINT_DEBUG_NAME; if (n <= 0) return 0.; IDEBUG_START; FUNCTION_PROFILE_START(); ret = (FLOATRET)NRM2_K(n, x, incx); FUNCTION_PROFILE_END(COMPSIZE, n, 2 * n); IDEBUG_END; return ret; }
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ double ret = 0.0; PRINT_DEBUG_CNAME; if (n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; ret=DSDOT_K(n, x, incx, y, incy); FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; return ret; }
CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){ CBLAS_INDEX ret; PRINT_DEBUG_CNAME; if (n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); ret = MAX_K(n, x, incx); if (ret > n) ret=n; if (ret) ret --; FUNCTION_PROFILE_END(COMPSIZE, n, 0); IDEBUG_END; return ret; }
blasint NAME(blasint *N, FLOAT *x, blasint *INCX){ BLASLONG n = *N; BLASLONG incx = *INCX; blasint ret; PRINT_DEBUG_NAME; if (n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); ret = (blasint)MAX_K(n, x, incx); if(ret > n) ret=n; FUNCTION_PROFILE_END(COMPSIZE, n, 0); IDEBUG_END; return ret; }
void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ blasint n = *N; blasint incx = *INCX; blasint incy = *INCY; #else void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif #ifdef SMP int mode; FLOAT dummyalpha[2] = {ZERO, ZERO}; int nthreads; #endif #ifndef CBLAS PRINT_DEBUG_NAME; #else PRINT_DEBUG_CNAME; #endif if (n <= 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * 2; if (incy < 0) y -= (n - 1) * incy * 2; #ifdef SMP nthreads = num_cpu_avail(1); if (nthreads == 1) { #endif SWAP_K(n, 0, 0, ZERO, ZERO, x, incx, y, incy, NULL, 0); #ifdef SMP } else { #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_COMPLEX; #else mode = BLAS_SINGLE | BLAS_COMPLEX; #endif blas_level1_thread(mode, n, 0, 0, dummyalpha, x, incx, y, incy, NULL, 0, (void *)SWAP_K, nthreads); } #endif FUNCTION_PROFILE_END(2, 2 * n, 0); IDEBUG_END; return; }
void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ char trans = *TRANS; blasint m = *M; blasint n = *N; blasint lda = *LDA; blasint incx = *INCX; blasint incy = *INCY; FLOAT alpha = *ALPHA; FLOAT beta = *BETA; FLOAT *buffer; #ifdef SMP int nthreads; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { GEMV_N, GEMV_T, }; blasint info; blasint lenx, leny; blasint i; PRINT_DEBUG_NAME; TOUPPER(trans); info = 0; i = -1; if (trans == 'N') i = 0; if (trans == 'T') i = 1; if (trans == 'R') i = 0; if (trans == 'C') i = 1; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (i < 0) info = 1; trans = i; if (info != 0){ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy){ FLOAT *buffer; blasint lenx, leny; int trans; blasint info, t; #ifdef SMP int nthreads; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { GEMV_N, GEMV_T, }; PRINT_DEBUG_CNAME; trans = -1; info = 0; if (order == CblasColMajor) { if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 0; if (TransA == CblasConjTrans) trans = 1; info = -1; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (order == CblasRowMajor) { if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 1; if (TransA == CblasConjTrans) trans = 0; info = -1; t = n; n = m; m = t; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if ((m==0) || (n==0)) return; lenx = n; leny = m; if (trans) lenx = m; if (trans) leny = n; if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); if (alpha == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (lenx - 1) * incx; if (incy < 0) y -= (leny - 1) * incy; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP int nthreads_max = num_cpu_avail(2); int nthreads_avail = nthreads_max; double MNK = (double) m * (double) n; if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) nthreads_max = 1; if ( nthreads_max > nthreads_avail ) nthreads = nthreads_avail; else nthreads = nthreads_max; if (nthreads == 1) { #endif (gemv[(int)trans])(m, n, 0, alpha, a, lda, x, incx, y, incy, buffer); #ifdef SMP } else { (gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, y, incy, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); IDEBUG_END; return; }
void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *a){ char uplo_arg = *UPLO; blasint n = *N; FLOAT alpha = *ALPHA; blasint incx = *INCX; blasint info; int uplo; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incx == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a) { FLOAT *buffer; int uplo; blasint info; #ifdef SMP int nthreads; #endif PRINT_DEBUG_CNAME; uplo = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; info = -1; if (incx == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; info = -1; if (incx == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; if (alpha == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (spr[uplo])(n, alpha, x, incx, a, buffer); #ifdef SMP } else { (spr_thread[uplo])(n, alpha, x, incx, a, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); IDEBUG_END; return; }
void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ char uplo_arg = *UPLO; blasint n = *N; FLOAT alpha = *ALPHA; blasint lda = *LDA; blasint incx = *INCX; FLOAT beta = *BETA; blasint incy = *INCY; int (*symv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { SYMV_U, SYMV_L, }; #ifdef SMP int (*symv_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { SYMV_THREAD_U, SYMV_THREAD_L, }; #endif blasint info; int uplo; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incy == 0) info = 10; if (incx == 0) info = 7; if (lda < MAX(1, n)) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy) { FLOAT *buffer; int uplo; blasint info; #ifdef SMP int nthreads; #endif int (*symv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { SYMV_U, SYMV_L, }; #ifdef SMP int (*symv_thread[])(BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { SYMV_THREAD_U, SYMV_THREAD_L, }; #endif PRINT_DEBUG_CNAME; uplo = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; info = -1; if (incy == 0) info = 10; if (incx == 0) info = 7; if (lda < MAX(1, n)) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; info = -1; if (incy == 0) info = 10; if (incx == 0) info = 7; if (lda < MAX(1, n)) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; if (beta != ONE) SCAL_K(n, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); if (alpha == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; if (incy < 0 ) y -= (n - 1) * incy; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (symv[uplo])(n, n, alpha, a, lda, x, incx, y, incy, buffer); #ifdef SMP } else { (symv_thread[uplo])(n, alpha, a, lda, x, incx, y, incy, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(1, n * n / 2 + 2 * n, 2 * n * n); IDEBUG_END; return; }
void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *a){ char uplo_arg = *UPLO; blasint n = *N; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; blasint incx = *INCX; blasint info; int uplo; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incx == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } if (n == 0) return; if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (spr[uplo])(n, alpha_r, alpha_i, x, incx, a, buffer); #ifdef SMP } else { (spr_thread[uplo])(n, ALPHA, x, incx, a, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); IDEBUG_END; return; }
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; FLOAT alpha = *ALPHA; #else void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif #ifdef SMP int mode, nthreads; #endif #ifndef CBLAS PRINT_DEBUG_NAME; #else PRINT_DEBUG_CNAME; #endif if (n <= 0) return; if (alpha == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; #ifdef SMP nthreads = num_cpu_avail(1); //disable multi-thread when incx==0 or incy==0 //In that case, the threads would be dependent. if (incx == 0 || incy == 0) nthreads = 1; //Temporarily work-around the low performance issue with small imput size & //multithreads. if (n <= 10000) nthreads = 1; if (nthreads == 1) { #endif AXPYU_K(n, 0, 0, alpha, x, incx, y, incy, NULL, 0); #ifdef SMP } else { #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; #else mode = BLAS_SINGLE | BLAS_REAL; #endif blas_level1_thread(mode, n, 0, 0, &alpha, x, incx, y, incy, NULL, 0, (void *)AXPYU_K, nthreads); } #endif FUNCTION_PROFILE_END(1, 2 * n, 2 * n); IDEBUG_END; return; }
void NAME(char *UPLO, char *TRANS, char *DIAG, blasint *N, blasint *K, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ char uplo_arg = *UPLO; char trans_arg = *TRANS; char diag_arg = *DIAG; blasint n = *N; blasint k = *K; blasint lda = *LDA; blasint incx = *INCX; blasint info; int uplo; int unit; int trans; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); TOUPPER(trans_arg); TOUPPER(diag_arg); trans = -1; unit = -1; uplo = -1; if (trans_arg == 'N') trans = 0; if (trans_arg == 'T') trans = 1; if (trans_arg == 'R') trans = 0; if (trans_arg == 'C') trans = 1; if (diag_arg == 'U') unit = 0; if (diag_arg == 'N') unit = 1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incx == 0) info = 9; if (lda < k + 1) info = 7; if (k < 0) info = 5; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint n, blasint k, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { int trans, uplo, unit; blasint info; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_CNAME; unit = -1; uplo = -1; trans = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 0; if (TransA == CblasConjTrans) trans = 1; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 9; if (lda < k + 1) info = 7; if (k < 0) info = 5; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 1; if (TransA == CblasConjTrans) trans = 0; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 9; if (lda < k + 1) info = 7; if (k < 0) info = 5; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (tbmv[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer); #ifdef SMP } else { (tbmv_thread[(trans<<2) | (uplo<<1) | unit])(n, k, a, lda, x, incx, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(1, n * k / 2 + n, n * k); IDEBUG_END; return; }
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da_r = *(DA + 0); long double da_i = *(DA + 1); long double db_r = *(DB + 0); long double db_i = *(DB + 1); long double r; long double ada = fabs(da_r) + fabs(da_i); PRINT_DEBUG_NAME; IDEBUG_START; FUNCTION_PROFILE_START(); if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; *(S + 1) = ZERO; *(DA + 0) = db_r; *(DA + 1) = db_i; } else { long double alpha_r, alpha_i; ada = sqrt(da_r * da_r + da_i * da_i); r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i); alpha_r = da_r / ada; alpha_i = da_i / ada; *(C + 0) = ada / r; *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; *(DA + 0) = alpha_r * r; *(DA + 1) = alpha_i * r; } #else FLOAT da_r = *(DA + 0); FLOAT da_i = *(DA + 1); FLOAT db_r = *(DB + 0); FLOAT db_i = *(DB + 1); FLOAT r; FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT adb; PRINT_DEBUG_NAME; IDEBUG_START; FUNCTION_PROFILE_START(); if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; *(S + 1) = ZERO; *(DA + 0) = db_r; *(DA + 1) = db_i; } else { FLOAT scale; FLOAT aa_r, aa_i, bb_r, bb_i; FLOAT alpha_r, alpha_i; aa_r = fabs(da_r); aa_i = fabs(da_i); if (aa_i > aa_r) { aa_r = fabs(da_i); aa_i = fabs(da_r); } scale = (aa_i / aa_r); ada = aa_r * sqrt(ONE + scale * scale); bb_r = fabs(db_r); bb_i = fabs(db_i); if (bb_i > bb_r) { bb_r = fabs(bb_i); bb_i = fabs(bb_r); } scale = (bb_i / bb_r); adb = bb_r * sqrt(ONE + scale * scale); scale = ada + adb; aa_r = da_r / scale; aa_i = da_i / scale; bb_r = db_r / scale; bb_i = db_i / scale; r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); alpha_r = da_r / ada; alpha_i = da_i / ada; *(C + 0) = ada / r; *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; *(DA + 0) = alpha_r * r; *(DA + 1) = alpha_i * r; } #endif FUNCTION_PROFILE_END(4, 4, 4); IDEBUG_END; return; }
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ blasint n = *N; blasint incx = *INCX; blasint incy = *INCY; #else void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); #ifdef SMP int mode, nthreads; #endif #ifndef CBLAS PRINT_DEBUG_CNAME; #else PRINT_DEBUG_CNAME; #endif if (n <= 0) return; if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * 2; if (incy < 0) y -= (n - 1) * incy * 2; #ifdef SMP nthreads = num_cpu_avail(1); if (nthreads == 1) { #endif #ifndef CONJ AXPYU_K (n, 0, 0, alpha_r, alpha_i, x, incx, y, incy, NULL, 0); #else AXPYC_K(n, 0, 0, alpha_r, alpha_i, x, incx, y, incy, NULL, 0); #endif #ifdef SMP } else { #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_COMPLEX; #else mode = BLAS_SINGLE | BLAS_COMPLEX; #endif blas_level1_thread(mode, n, 0, 0, ALPHA, x, incx, y, incy, NULL, 0, #ifndef CONJ (void *)AXPYU_K, #else (void *)AXPYC_K, #endif nthreads); } #endif FUNCTION_PROFILE_END(4, 2 * n, 2 * n); IDEBUG_END; return; }
void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ char trans = *TRANS; blasint m = *M; blasint n = *N; blasint lda = *LDA; blasint incx = *INCX; blasint incy = *INCY; FLOAT alpha = *ALPHA; FLOAT beta = *BETA; FLOAT *buffer; #ifdef SMP int nthreads; int nthreads_max; int nthreads_avail; double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { GEMV_N, GEMV_T, }; blasint info; blasint lenx, leny; blasint i; PRINT_DEBUG_NAME; TOUPPER(trans); info = 0; i = -1; if (trans == 'N') i = 0; if (trans == 'T') i = 1; if (trans == 'R') i = 0; if (trans == 'C') i = 1; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (i < 0) info = 1; trans = i; if (info != 0){ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy){ FLOAT *buffer; blasint lenx, leny; int trans; blasint info, t; #ifdef SMP int nthreads; int nthreads_max; int nthreads_avail; double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { GEMV_N, GEMV_T, }; PRINT_DEBUG_CNAME; trans = -1; info = 0; if (order == CblasColMajor) { if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 0; if (TransA == CblasConjTrans) trans = 1; info = -1; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (order == CblasRowMajor) { if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 1; if (TransA == CblasConjTrans) trans = 0; info = -1; t = n; n = m; m = t; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif //printf("m=%d, n=%d, trans=%d, incx=%d, incy=%d, alpha=%f, beta=%f\n", m, n, trans, incx, incy, alpha, beta); if ((m==0) || (n==0)) return; lenx = n; leny = m; if (trans) lenx = m; if (trans) leny = n; if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, abs(incy), NULL, 0, NULL, 0); if (alpha == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (lenx - 1) * incx; if (incy < 0) y -= (leny - 1) * incy; #ifdef MAX_STACK_ALLOC // make it volatile because some gemv implementation (ex: dgemv_n.S) // do not restore all register volatile int stack_alloc_size = 0; //for gemv_n and gemv_t, try to allocate on stack stack_alloc_size = m + n; #ifdef ALIGNED_ACCESS stack_alloc_size += 3; #endif // if(stack_alloc_size < 128) //dgemv_n.S require a 128 bytes buffer // increasing instead of capping 128 // ABI STACK for windows 288 bytes stack_alloc_size += 288 / sizeof(FLOAT) ; if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) stack_alloc_size = 0; // stack overflow check volatile double stack_check = 3.14159265358979323846; FLOAT stack_buffer[stack_alloc_size]; buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); // printf("stack_alloc_size=%d\n", stack_alloc_size); #else //Original OpenBLAS/GotoBLAS codes. buffer = (FLOAT *)blas_memory_alloc(1); #endif #ifdef SMP nthreads_max = num_cpu_avail(2); nthreads_avail = nthreads_max; MNK = (double) m * (double) n; if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) nthreads_max = 1; if ( nthreads_max > nthreads_avail ) nthreads = nthreads_avail; else nthreads = nthreads_max; if (nthreads == 1) { #endif (gemv[(int)trans])(m, n, 0, alpha, a, lda, x, incx, y, incy, buffer); #ifdef SMP } else { (gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, y, incy, buffer, nthreads); } #endif // stack overflow check assert(stack_check==3.14159265358979323846); #ifdef MAX_STACK_ALLOC if(!stack_alloc_size){ blas_memory_free(buffer); } #else blas_memory_free(buffer); #endif FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); IDEBUG_END; return; }
void NAME(char *SIDE, char *UPLO, blasint *M, blasint *N, FLOAT *alpha, FLOAT *a, blasint *ldA, FLOAT *b, blasint *ldB, FLOAT *beta, FLOAT *c, blasint *ldC){ char side_arg = *SIDE; char uplo_arg = *UPLO; blas_arg_t args; FLOAT *buffer; FLOAT *sa, *sb; #if defined(SMP) && !defined(NO_AFFINITY) int nodes; #endif blasint info; int side; int uplo; PRINT_DEBUG_NAME; args.alpha = (void *)alpha; args.beta = (void *)beta; TOUPPER(side_arg); TOUPPER(uplo_arg); side = -1; uplo = -1; if (side_arg == 'L') side = 0; if (side_arg == 'R') side = 1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; args.m = *M; args.n = *N; args.c = (void *)c; args.ldc = *ldC; info = 0; if (args.ldc < MAX(1, args.m)) info = 12; if (!side) { args.a = (void *)a; args.b = (void *)b; args.lda = *ldA; args.ldb = *ldB; if (args.ldb < MAX(1, args.m)) info = 9; if (args.lda < MAX(1, args.m)) info = 7; } else { args.a = (void *)b; args.b = (void *)a; args.lda = *ldB; args.ldb = *ldA; if (args.lda < MAX(1, args.m)) info = 9; if (args.ldb < MAX(1, args.n)) info = 7; } if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (uplo < 0) info = 2; if (side < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint m, blasint n, #ifndef COMPLEX FLOAT alpha, #else FLOAT *alpha, #endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, #ifndef COMPLEX FLOAT beta, #else FLOAT *beta, #endif FLOAT *c, blasint ldc) { blas_arg_t args; int side, uplo; blasint info; FLOAT *buffer; FLOAT *sa, *sb; #if defined(SMP) && !defined(NO_AFFINITY) int nodes; #endif PRINT_DEBUG_CNAME; #ifndef COMPLEX args.alpha = (void *)α args.beta = (void *)β #else args.alpha = (void *)alpha; args.beta = (void *)beta; #endif args.c = (void *)c; args.ldc = ldc; side = -1; uplo = -1; info = 0; if (order == CblasColMajor) { if (Side == CblasLeft) side = 0; if (Side == CblasRight) side = 1; if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; info = -1; args.m = m; args.n = n; if (args.ldc < MAX(1, args.m)) info = 12; if (!side) { args.a = (void *)a; args.b = (void *)b; args.lda = lda; args.ldb = ldb; if (args.ldb < MAX(1, args.m)) info = 9; if (args.lda < MAX(1, args.m)) info = 7; } else { args.a = (void *)b; args.b = (void *)a; args.lda = ldb; args.ldb = lda; if (args.lda < MAX(1, args.m)) info = 9; if (args.ldb < MAX(1, args.n)) info = 7; } if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (uplo < 0) info = 2; if (side < 0) info = 1; } if (order == CblasRowMajor) { if (Side == CblasLeft) side = 1; if (Side == CblasRight) side = 0; if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; info = -1; args.m = n; args.n = m; if (args.ldc < MAX(1, args.m)) info = 12; if (!side) { args.a = (void *)a; args.b = (void *)b; args.lda = lda; args.ldb = ldb; if (args.ldb < MAX(1, args.m)) info = 9; if (args.lda < MAX(1, args.m)) info = 7; } else { args.a = (void *)b; args.b = (void *)a; args.lda = ldb; args.ldb = lda; if (args.lda < MAX(1, args.m)) info = 9; if (args.ldb < MAX(1, args.n)) info = 7; } if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (uplo < 0) info = 2; if (side < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (args.m == 0 || args.n == 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); buffer = (FLOAT *)blas_memory_alloc(0); sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #ifdef SMP args.common = NULL; args.nthreads = num_cpu_avail(3); if (args.nthreads == 1) { #endif (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP } else { #ifndef NO_AFFINITY nodes = get_num_nodes(); if (nodes > 1) { args.nthreads /= nodes; gemm_thread_mn(MODE, &args, NULL, NULL, symm[4 | (side << 1) | uplo ], sa, sb, nodes); } else { #endif #ifndef USE_SIMPLE_THREADED_LEVEL3 (symm[4 | (side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); #else GEMM_THREAD(MODE, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads); #endif #ifndef NO_AFFINITY } #endif } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, (!side)? args.m * (args.m / 2 + args.n) : args.n * (args.m + args.n / 2), (!side)? 2 * args.m * args.m * args.n : 2 * args.m * args.n * args.n); IDEBUG_END; return; }
void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ blasint n = *N; blasint incx = *INCX; blasint incy = *INCY; #else void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif #ifdef SMP int mode, nthreads; FLOAT dummyalpha[2] = {ZERO, ZERO}; #endif #ifndef CBLAS PRINT_DEBUG_NAME; #else PRINT_DEBUG_CNAME; #endif if (n <= 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; #ifdef SMP nthreads = num_cpu_avail(1); //disable multi-thread when incx==0 or incy==0 //In that case, the threads would be dependent. if (incx == 0 || incy == 0) nthreads = 1; if (nthreads == 1) { #endif SWAP_K(n, 0, 0, ZERO, x, incx, y, incy, NULL, 0); #ifdef SMP } else { #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; #else mode = BLAS_SINGLE | BLAS_REAL; #endif blas_level1_thread(mode, n, 0, 0, dummyalpha, x, incx, y, incy, NULL, 0, (void *)SWAP_K, nthreads); } #endif FUNCTION_PROFILE_END(1, 2 * n, 0); IDEBUG_END; return; }
void NAME(char *TRANS, blasint *M, blasint *N, blasint *KU, blasint *KL, FLOAT *ALPHA, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ char trans = *TRANS; blasint m = *M; blasint n = *N; blasint ku = *KU; blasint kl = *KL; blasint lda = *LDA; blasint incx = *INCX; blasint incy = *INCY; FLOAT *buffer; #ifdef SMP int nthreads; #endif FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; FLOAT beta_r = BETA[0]; FLOAT beta_i = BETA[1]; blasint info; blasint lenx, leny; blasint i; PRINT_DEBUG_NAME; TOUPPER(trans); info = 0; i = -1; if (trans == 'N') i = 0; if (trans == 'T') i = 1; if (trans == 'R') i = 2; if (trans == 'C') i = 3; if (trans == 'O') i = 4; if (trans == 'U') i = 5; if (trans == 'S') i = 6; if (trans == 'D') i = 7; if (incy == 0) info = 13; if (incx == 0) info = 10; if (lda < kl + ku + 1) info = 8; if (kl < 0) info = 5; if (ku < 0) info = 4; if (n < 0) info = 3; if (m < 0) info = 2; if (i < 0) info = 1; trans = i; if (info != 0){ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, blasint ku, blasint kl, FLOAT *ALPHA, FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy){ FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; FLOAT beta_r = BETA[0]; FLOAT beta_i = BETA[1]; FLOAT *buffer; blasint lenx, leny; int trans; blasint info, t; #ifdef SMP int nthreads; #endif PRINT_DEBUG_CNAME; trans = -1; info = 0; if (order == CblasColMajor) { if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 2; if (TransA == CblasConjTrans) trans = 3; info = -1; if (incy == 0) info = 13; if (incx == 0) info = 10; if (lda < kl + ku + 1) info = 8; if (kl < 0) info = 5; if (ku < 0) info = 4; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (order == CblasRowMajor) { if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 3; if (TransA == CblasConjTrans) trans = 2; info = -1; t = n; n = m; m = t; t = ku; ku = kl; kl = t; if (incy == 0) info = 13; if (incx == 0) info = 10; if (lda < kl + ku + 1) info = 8; if (kl < 0) info = 5; if (ku < 0) info = 4; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if ((m==0) || (n==0)) return; lenx = n; leny = m; if (trans & 1) lenx = m; if (trans & 1) leny = n; if (beta_r != ONE || beta_i != ZERO) SCAL_K(leny, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); if (alpha_r == ZERO && alpha_i == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (lenx - 1) * incx * 2; if (incy < 0) y -= (leny - 1) * incy * 2; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (gbmv[(int)trans])(m, n, kl, ku, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer); #ifdef SMP } else { (gbmv_thread[(int)trans])(m, n, kl, ku, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(4, m * n / 2 + n, m * n); IDEBUG_END; return; }
void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY){ char trans = *TRANS; blasint m = *M; blasint n = *N; blasint lda = *LDA; blasint incx = *INCX; blasint incy = *INCY; FLOAT *buffer; #ifdef SMP int nthreads; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { GEMV_N, GEMV_T, GEMV_R, GEMV_C, GEMV_O, GEMV_U, GEMV_S, GEMV_D, }; blasint info; blasint lenx, leny; blasint i; PRINT_DEBUG_NAME; FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); TOUPPER(trans); info = 0; i = -1; if (trans == 'N') i = 0; if (trans == 'T') i = 1; if (trans == 'R') i = 2; if (trans == 'C') i = 3; if (trans == 'O') i = 4; if (trans == 'U') i = 5; if (trans == 'S') i = 6; if (trans == 'D') i = 7; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1,m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (i < 0) info = 1; trans = i; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, FLOAT *ALPHA, FLOAT *a, blasint lda, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy){ FLOAT *buffer; blasint lenx, leny; int trans; blasint info, t; #ifdef SMP int nthreads; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { GEMV_N, GEMV_T, GEMV_R, GEMV_C, GEMV_O, GEMV_U, GEMV_S, GEMV_D, }; PRINT_DEBUG_CNAME; FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); trans = -1; info = 0; if (order == CblasColMajor) { if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 2; if (TransA == CblasConjTrans) trans = 3; info = -1; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (order == CblasRowMajor) { if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 3; if (TransA == CblasConjTrans) trans = 2; info = -1; t = n; n = m; m = t; if (incy == 0) info = 11; if (incx == 0) info = 8; if (lda < MAX(1, m)) info = 6; if (n < 0) info = 3; if (m < 0) info = 2; if (trans < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif /* Quick return if possible. */ if (m == 0 || n == 0) return; lenx = n; leny = m; if (trans & 1) lenx = m; if (trans & 1) leny = n; if (beta_r != ONE || beta_i != ZERO) SCAL_K(leny, 0, 0, beta_r, beta_i, y, abs(incy), NULL, 0, NULL, 0); if (alpha_r == ZERO && alpha_i == ZERO) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (lenx - 1) * incx * 2; if (incy < 0) y -= (leny - 1) * incy * 2; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (gemv[(int)trans])(m, n, 0, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer); #ifdef SMP } else { (gemv_thread[(int)trans])(m, n, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n); IDEBUG_END; return; }
void NAME(char *TRANSA, char *TRANSB, blasint *M, blasint *N, blasint *K, FLOAT *alpha, FLOAT *a, blasint *ldA, FLOAT *b, blasint *ldB, FLOAT *beta, FLOAT *c, blasint *ldC){ blas_arg_t args; int transa, transb, nrowa, nrowb; blasint info; char transA, transB; FLOAT *buffer; FLOAT *sa, *sb; #ifdef SMP #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_REAL; #else int mode = BLAS_SINGLE | BLAS_REAL; #endif #else #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_COMPLEX; #else int mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif #endif #if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) int nodes; #endif PRINT_DEBUG_NAME; args.m = *M; args.n = *N; args.k = *K; args.a = (void *)a; args.b = (void *)b; args.c = (void *)c; args.lda = *ldA; args.ldb = *ldB; args.ldc = *ldC; args.alpha = (void *)alpha; args.beta = (void *)beta; transA = *TRANSA; transB = *TRANSB; TOUPPER(transA); TOUPPER(transB); transa = -1; transb = -1; if (transA == 'N') transa = 0; if (transA == 'T') transa = 1; #ifndef COMPLEX if (transA == 'R') transa = 0; if (transA == 'C') transa = 1; #else if (transA == 'R') transa = 2; if (transA == 'C') transa = 3; #endif if (transB == 'N') transb = 0; if (transB == 'T') transb = 1; #ifndef COMPLEX if (transB == 'R') transb = 0; if (transB == 'C') transb = 1; #else if (transB == 'R') transb = 2; if (transB == 'C') transb = 3; #endif nrowa = args.m; if (transa & 1) nrowa = args.k; nrowb = args.k; if (transb & 1) nrowb = args.n; info = 0; if (args.ldc < args.m) info = 13; if (args.ldb < nrowb) info = 10; if (args.lda < nrowa) info = 8; if (args.k < 0) info = 5; if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (transb < 0) info = 2; if (transa < 0) info = 1; if (info){ BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, blasint n, blasint k, #ifndef COMPLEX FLOAT alpha, #else FLOAT *alpha, #endif FLOAT *a, blasint lda, FLOAT *b, blasint ldb, #ifndef COMPLEX FLOAT beta, #else FLOAT *beta, #endif FLOAT *c, blasint ldc) { blas_arg_t args; int transa, transb; blasint nrowa, nrowb, info; XFLOAT *buffer; XFLOAT *sa, *sb; #ifdef SMP #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_REAL; #else int mode = BLAS_SINGLE | BLAS_REAL; #endif #else #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) int mode = BLAS_DOUBLE | BLAS_COMPLEX; #else int mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif #endif #if defined(SMP) && !defined(NO_AFFINITY) && !defined(USE_SIMPLE_THREADED_LEVEL3) int nodes; #endif PRINT_DEBUG_CNAME; #ifndef COMPLEX args.alpha = (void *)α args.beta = (void *)β #else args.alpha = (void *)alpha; args.beta = (void *)beta; #endif transa = -1; transb = -1; info = 0; if (order == CblasColMajor) { args.m = m; args.n = n; args.k = k; args.a = (void *)a; args.b = (void *)b; args.c = (void *)c; args.lda = lda; args.ldb = ldb; args.ldc = ldc; if (TransA == CblasNoTrans) transa = 0; if (TransA == CblasTrans) transa = 1; #ifndef COMPLEX if (TransA == CblasConjNoTrans) transa = 0; if (TransA == CblasConjTrans) transa = 1; #else if (TransA == CblasConjNoTrans) transa = 2; if (TransA == CblasConjTrans) transa = 3; #endif if (TransB == CblasNoTrans) transb = 0; if (TransB == CblasTrans) transb = 1; #ifndef COMPLEX if (TransB == CblasConjNoTrans) transb = 0; if (TransB == CblasConjTrans) transb = 1; #else if (TransB == CblasConjNoTrans) transb = 2; if (TransB == CblasConjTrans) transb = 3; #endif nrowa = args.m; if (transa & 1) nrowa = args.k; nrowb = args.k; if (transb & 1) nrowb = args.n; info = -1; if (args.ldc < args.m) info = 13; if (args.ldb < nrowb) info = 10; if (args.lda < nrowa) info = 8; if (args.k < 0) info = 5; if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (transb < 0) info = 2; if (transa < 0) info = 1; } if (order == CblasRowMajor) { args.m = n; args.n = m; args.k = k; args.a = (void *)b; args.b = (void *)a; args.c = (void *)c; args.lda = ldb; args.ldb = lda; args.ldc = ldc; if (TransB == CblasNoTrans) transa = 0; if (TransB == CblasTrans) transa = 1; #ifndef COMPLEX if (TransB == CblasConjNoTrans) transa = 0; if (TransB == CblasConjTrans) transa = 1; #else if (TransB == CblasConjNoTrans) transa = 2; if (TransB == CblasConjTrans) transa = 3; #endif if (TransA == CblasNoTrans) transb = 0; if (TransA == CblasTrans) transb = 1; #ifndef COMPLEX if (TransA == CblasConjNoTrans) transb = 0; if (TransA == CblasConjTrans) transb = 1; #else if (TransA == CblasConjNoTrans) transb = 2; if (TransA == CblasConjTrans) transb = 3; #endif nrowa = args.m; if (transa & 1) nrowa = args.k; nrowb = args.k; if (transb & 1) nrowb = args.n; info = -1; if (args.ldc < args.m) info = 13; if (args.ldb < nrowb) info = 10; if (args.lda < nrowa) info = 8; if (args.k < 0) info = 5; if (args.n < 0) info = 4; if (args.m < 0) info = 3; if (transb < 0) info = 2; if (transa < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if ((args.m == 0) || (args.n == 0)) return; #if 0 fprintf(stderr, "m = %4d n = %d k = %d lda = %4d ldb = %4d ldc = %4d\n", args.m, args.n, args.k, args.lda, args.ldb, args.ldc); #endif IDEBUG_START; FUNCTION_PROFILE_START(); buffer = (XFLOAT *)blas_memory_alloc(0); sa = (XFLOAT *)((BLASLONG)buffer +GEMM_OFFSET_A); sb = (XFLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #ifdef SMP mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); args.common = NULL; args.nthreads = num_cpu_avail(3); if (args.nthreads == 1) { #endif (gemm[(transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP } else { #ifndef USE_SIMPLE_THREADED_LEVEL3 #ifndef NO_AFFINITY nodes = get_num_nodes(); if ((nodes > 1) && get_node_equal()) { args.nthreads /= nodes; gemm_thread_mn(mode, &args, NULL, NULL, gemm[16 | (transb << 2) | transa], sa, sb, nodes); } else { #endif (gemm[16 | (transb << 2) | transa])(&args, NULL, NULL, sa, sb, 0); #else GEMM_THREAD(mode, &args, NULL, NULL, gemm[(transb << 2) | transa], sa, sb, args.nthreads); #endif #ifndef USE_SIMPLE_THREADED_LEVEL3 #ifndef NO_AFFINITY } #endif #endif #endif #ifdef SMP } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.k + args.k * args.n + args.m * args.n, 2 * args.m * args.n * args.k); IDEBUG_END; return; }
void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a, blasint *LDA){ char uplo_arg = *UPLO; blasint n = *N; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; blasint lda = *LDA; blasint incx = *INCX; blasint incy = *INCY; blasint info; int uplo; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (lda < MAX(1, n)) info = 9; if (incy == 0) info = 7; if (incx == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) { FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; FLOAT *buffer; int uplo; blasint info; #ifdef SMP int nthreads; #endif PRINT_DEBUG_CNAME; uplo = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; info = -1; if (lda < MAX(1, n)) info = 9; if (incy == 0) info = 7; if (incx == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 3; if (Uplo == CblasLower) uplo = 2; info = -1; if (lda < MAX(1, n)) info = 9; if (incx == 0) info = 7; if (incy == 0) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx * 2; if (incy < 0 ) y -= (n - 1) * incy * 2; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (her2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, lda, buffer); #ifdef SMP } else { (her2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, lda, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); IDEBUG_END; return; }
void NAME(char *UPLO, char *TRANS, char *DIAG, blasint *N, FLOAT *a, FLOAT *x, blasint *INCX){ char uplo_arg = *UPLO; char trans_arg = *TRANS; char diag_arg = *DIAG; blasint n = *N; blasint incx = *INCX; blasint info; int uplo; int unit; int trans; FLOAT *buffer; PRINT_DEBUG_NAME; TOUPPER(uplo_arg); TOUPPER(trans_arg); TOUPPER(diag_arg); trans = -1; unit = -1; uplo = -1; if (trans_arg == 'N') trans = 0; if (trans_arg == 'T') trans = 1; if (trans_arg == 'R') trans = 0; if (trans_arg == 'C') trans = 1; if (diag_arg == 'U') unit = 0; if (diag_arg == 'N') unit = 1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incx == 0) info = 7; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint n, FLOAT *a, FLOAT *x, blasint incx) { int trans, uplo, unit; blasint info; FLOAT *buffer; PRINT_DEBUG_CNAME; unit = -1; uplo = -1; trans = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 0; if (TransA == CblasConjTrans) trans = 1; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 7; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 1; if (TransA == CblasConjTrans) trans = 0; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 7; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; buffer = (FLOAT *)blas_memory_alloc(1); (tpsv[(trans<<2) | (uplo<<1) | unit])(n, a, x, incx, buffer); blas_memory_free(buffer); FUNCTION_PROFILE_END(1, n * n / 2 + n, n * n); IDEBUG_END; return; }
void NAME(char *UPLO, char *TRANS, char *DIAG, blasint *N, FLOAT *a, blasint *LDA, FLOAT *x, blasint *INCX){ char uplo_arg = *UPLO; char trans_arg = *TRANS; char diag_arg = *DIAG; blasint n = *N; blasint lda = *LDA; blasint incx = *INCX; blasint info; int uplo; int unit; int trans, buffer_size; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); TOUPPER(trans_arg); TOUPPER(diag_arg); trans = -1; unit = -1; uplo = -1; if (trans_arg == 'N') trans = 0; if (trans_arg == 'T') trans = 1; if (trans_arg == 'R') trans = 2; if (trans_arg == 'C') trans = 3; if (diag_arg == 'U') unit = 0; if (diag_arg == 'N') unit = 1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incx == 0) info = 8; if (lda < MAX(1, n)) info = 6; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint n, FLOAT *a, blasint lda, FLOAT *x, blasint incx) { int trans, uplo, unit, buffer_size; blasint info; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_CNAME; unit = -1; uplo = -1; trans = -1; info = 0; if (order == CblasColMajor) { if (Uplo == CblasUpper) uplo = 0; if (Uplo == CblasLower) uplo = 1; if (TransA == CblasNoTrans) trans = 0; if (TransA == CblasTrans) trans = 1; if (TransA == CblasConjNoTrans) trans = 2; if (TransA == CblasConjTrans) trans = 3; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 8; if (lda < MAX(1, n)) info = 6; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (order == CblasRowMajor) { if (Uplo == CblasUpper) uplo = 1; if (Uplo == CblasLower) uplo = 0; if (TransA == CblasNoTrans) trans = 1; if (TransA == CblasTrans) trans = 0; if (TransA == CblasConjNoTrans) trans = 3; if (TransA == CblasConjTrans) trans = 2; if (Diag == CblasUnit) unit = 0; if (Diag == CblasNonUnit) unit = 1; info = -1; if (incx == 0) info = 8; if (lda < MAX(1, n)) info = 6; if (n < 0) info = 4; if (unit < 0) info = 3; if (trans < 0) info = 2; if (uplo < 0) info = 1; } if (info >= 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #endif if (n == 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx * 2; #ifdef SMP // Calibrated on a Xeon E5-2630 if(1L * n * n > 36L * sizeof(FLOAT) * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) { nthreads = num_cpu_avail(2); if(nthreads > 2 && 1L * n * n < 64L * sizeof(FLOAT) * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) nthreads = 2; } else nthreads = 1; if(nthreads > 1) { buffer_size = n > 16 ? 0 : n * 4 + 40; } else #endif { buffer_size = ((n - 1) / DTB_ENTRIES) * 2 * DTB_ENTRIES + 32 / sizeof(FLOAT); if(incx != 1) buffer_size += n * 2; } STACK_ALLOC(buffer_size, FLOAT, buffer); #ifdef SMP if (nthreads == 1) { #endif (trmv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); #ifdef SMP } else { (trmv_thread[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer, nthreads); } #endif STACK_FREE(buffer); FUNCTION_PROFILE_END(4, n * n / 2 + n, n * n); IDEBUG_END; return; }
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ blas_arg_t args; blasint uplo_arg = *UPLO; blasint diag_arg = *DIAG; blasint uplo, diag; blasint info; FLOAT *buffer; #ifdef PPC440 extern #endif FLOAT *sa, *sb; PRINT_DEBUG_NAME; args.n = *N; args.a = (void *)a; args.lda = *ldA; TOUPPER(uplo_arg); TOUPPER(diag_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; diag = -1; if (diag_arg == 'U') diag = 0; if (diag_arg == 'N') diag = 1; info = 0; if (args.lda < MAX(1,args.n)) info = 5; if (args.n < 0) info = 3; if (diag < 0) info = 2; if (uplo < 0) info = 1; if (info) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); *Info = - info; return 0; } *Info = 0; if (args.n <= 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); #ifndef PPC440 buffer = (FLOAT *)blas_memory_alloc(1); sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #endif info = (trti2[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); *Info = info; #ifndef PPC440 blas_memory_free(buffer); #endif FUNCTION_PROFILE_END(1, .5 * args.n * args.n, 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + 6. * args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); IDEBUG_END; return 0; }
void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA, FLOAT *b, blasint *INCX, FLOAT *BETA, FLOAT *c, blasint *INCY){ char uplo_arg = *UPLO; blasint n = *N; FLOAT alpha_r = ALPHA[0]; FLOAT alpha_i = ALPHA[1]; blasint lda = *LDA; blasint incx = *INCX; FLOAT beta_r = BETA[0]; FLOAT beta_i = BETA[1]; blasint incy = *INCY; int (*symv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { SYMV_U, SYMV_L, }; #ifdef SMP int (*symv_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { SYMV_THREAD_U, SYMV_THREAD_L, }; #endif blasint info; int uplo; FLOAT *buffer; #ifdef SMP int nthreads; #endif PRINT_DEBUG_NAME; TOUPPER(uplo_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (incy == 0) info = 10; if (incx == 0) info = 7; if (lda < MAX(1, n)) info = 5; if (n < 0) info = 2; if (uplo < 0) info = 1; if (info != 0) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } if (n == 0) return; if ((beta_r != ONE) || (beta_i != ZERO)) SCAL_K(n, 0, 0, beta_r, beta_i, c, abs(incy), NULL, 0, NULL, 0); if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0 ) b -= (n - 1) * incx * COMPSIZE; if (incy < 0 ) c -= (n - 1) * incy * COMPSIZE; buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP nthreads = num_cpu_avail(2); if (nthreads == 1) { #endif (symv[uplo])(n, n, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); #ifdef SMP } else { (symv_thread[uplo])(n, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); } #endif blas_memory_free(buffer); FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n); IDEBUG_END; return; }
int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ blas_arg_t args; blasint uplo_arg = *UPLO; blasint uplo; blasint info; FLOAT *buffer; #ifdef PPC440 extern #endif FLOAT *sa, *sb; PRINT_DEBUG_NAME; args.n = *N; args.a = (void *)a; args.lda = *ldA; TOUPPER(uplo_arg); uplo = -1; if (uplo_arg == 'U') uplo = 0; if (uplo_arg == 'L') uplo = 1; info = 0; if (args.lda < MAX(1,args.n)) info = 4; if (args.n < 0) info = 2; if (uplo < 0) info = 1; if (info) { BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); *Info = - info; return 0; } *Info = 0; if (args.n == 0) return 0; IDEBUG_START; FUNCTION_PROFILE_START(); #ifndef PPC440 buffer = (FLOAT *)blas_memory_alloc(1); sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); #endif #ifdef SMP args.common = NULL; args.nthreads = num_cpu_avail(4); if (args.nthreads == 1) { #endif *Info = (lauum_single[uplo])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP } else { *Info = (lauum_parallel[uplo])(&args, NULL, NULL, sa, sb, 0); } #endif #ifndef PPC440 blas_memory_free(buffer); #endif FUNCTION_PROFILE_END(1, .5 * args.n * args.n, 2. * args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + args.n * (args.n * args.n - 1)); IDEBUG_END; return 0; }
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX){ blasint n = *N; blasint incx = *INCX; FLOAT alpha = *ALPHA; #else void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ #endif #ifdef SMP int mode, nthreads; #endif #ifndef CBLAS PRINT_DEBUG_NAME; #else PRINT_DEBUG_CNAME; #endif if (incx <= 0 || n <= 0) return; if (alpha == ONE) return; IDEBUG_START; FUNCTION_PROFILE_START(); #ifdef SMP nthreads = num_cpu_avail(1); if (nthreads == 1) { #endif SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0); #ifdef SMP } else { #ifdef DOUBLE mode = BLAS_DOUBLE | BLAS_REAL; #else mode = BLAS_SINGLE | BLAS_REAL; #endif blas_level1_thread(mode, n, 0, 0, #ifndef CBLAS ALPHA, #else &alpha, #endif x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads); } #endif FUNCTION_PROFILE_END(1, n, n); IDEBUG_END; return; }