void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ blasint n = *N; blasint incx = *INCX; blasint incy = *INCY; #else void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif #ifdef SMP int mode; FLOAT dummyalpha[2] = {ZERO, ZERO}; int nthreads; #endif #ifndef CBLAS PRINT_DEBUG_NAME; #else PRINT_DEBUG_CNAME; #endif if (n <= 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * 2; if (incy < 0) y -= (n - 1) * incy * 2; #ifdef SMP nthreads = num_cpu_avail(1); if (nthreads == 1) { #endif SWAP_K(n, 0, 0, ZERO, ZERO, x, incx, y, incy, NULL, 0); #ifdef SMP } else { #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_COMPLEX; #else mode = BLAS_SINGLE | BLAS_COMPLEX; #endif blas_level1_thread(mode, n, 0, 0, dummyalpha, x, incx, y, incy, NULL, 0, (void *)SWAP_K, nthreads); } #endif FUNCTION_PROFILE_END(2, 2 * n, 0); IDEBUG_END; return; }
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { BLASLONG m, n, lda; blasint *ipiv, offset; FLOAT *a; FLOAT temp1, temp2; blasint i, j; blasint ip, jp; blasint info; BLASLONG len; FLOAT *b; m = args -> m; n = args -> n; a = (FLOAT *)args -> a; lda = args -> lda; ipiv = (blasint *)args -> c; offset = 0; if (range_n) { m -= range_n[0]; n = range_n[1] - range_n[0]; offset = range_n[0]; a += range_n[0] * (lda + 1) * COMPSIZE; } info = 0; b = a; for (j = 0; j < n; j++) { len = MIN(j, m); for (i = 0; i < len; i++) { ip = ipiv[i + offset] - 1 - offset; if (ip != i) { temp1 = *(b + i); temp2 = *(b + ip); *(b + i) = temp2; *(b + ip) = temp1; } } for (i = 1; i < len; i++) { b[i] -= DOTU_K(i, a + i, lda, b, 1); } if (j < m) { GEMV_N(m - j, j, 0, dm1, a + j, lda, b, 1, b + j, 1, sb); jp = j + IAMAX_K(m - j, b + j, 1); ipiv[j + offset] = jp + offset; jp--; temp1 = *(b + jp); if (temp1 != ZERO) { temp1 = dp1 / temp1; if (jp != j) { SWAP_K(j + 1, 0, 0, ZERO, a + j, lda, a + jp, lda, NULL, 0); } if (j + 1 < m) { SCAL_K(m - j - 1, 0, 0, temp1, b + j + 1, 1, NULL, 0, NULL, 0); } } else { if (!info) info = j + 1; } } b += lda; } return info; }
void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ blasint n = *N; blasint incx = *INCX; blasint incy = *INCY; #else void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ #endif #ifdef SMP int mode, nthreads; FLOAT dummyalpha[2] = {ZERO, ZERO}; #endif #ifndef CBLAS PRINT_DEBUG_NAME; #else PRINT_DEBUG_CNAME; #endif if (n <= 0) return; IDEBUG_START; FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx; if (incy < 0) y -= (n - 1) * incy; #ifdef SMP nthreads = num_cpu_avail(1); //disable multi-thread when incx==0 or incy==0 //In that case, the threads would be dependent. if (incx == 0 || incy == 0) nthreads = 1; if (nthreads == 1) { #endif SWAP_K(n, 0, 0, ZERO, x, incx, y, incy, NULL, 0); #ifdef SMP } else { #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; #else mode = BLAS_SINGLE | BLAS_REAL; #endif blas_level1_thread(mode, n, 0, 0, dummyalpha, x, incx, y, incy, NULL, 0, (void *)SWAP_K, nthreads); } #endif FUNCTION_PROFILE_END(1, 2 * n, 0); IDEBUG_END; return; }
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { BLASLONG m, n, lda, offset; blasint *ipiv; FLOAT *a; FLOAT temp1, temp2, temp3, temp4, ratio, den; blasint i, j; blasint ip, jp; blasint info; BLASLONG len; FLOAT *b; m = args -> m; n = args -> n; a = (FLOAT *)args -> a; lda = args -> lda; ipiv = (blasint *)args -> c; offset = 0; if (range_n) { m -= range_n[0]; n = range_n[1] - range_n[0]; offset = range_n[0]; a += range_n[0] * (lda + 1) * COMPSIZE; } info = 0; b = a; for (j = 0; j < n; j++) { len = MIN(j, m); for (i = 0; i < len; i++) { ip = ipiv[i + offset] - 1 - offset; if (ip != i) { temp1 = *(b + i * 2 + 0); temp2 = *(b + i * 2 + 1); temp3 = *(b + ip * 2 + 0); temp4 = *(b + ip * 2 + 1); *(b + i * 2 + 0) = temp3; *(b + i * 2 + 1) = temp4; *(b + ip * 2 + 0) = temp1; *(b + ip * 2 + 1) = temp2; } } ZTRSV_NLU(len, a, lda, b, 1, sb); if (j < m) { GEMV_N(m - j, j, 0, dm1, ZERO, a + j * 2, lda, b, 1, b + j * 2, 1, sb); jp = j + IAMAX_K(m - j, b + j * 2, 1); ipiv[j + offset] = jp + offset; jp--; temp1 = *(b + jp * 2 + 0); temp2 = *(b + jp * 2 + 1); if ((temp1 != ZERO) || (temp2 != ZERO)) { if (jp != j) { SWAP_K(j + 1, 0, 0, ZERO, ZERO, a + j * 2, lda, a + jp * 2, lda, NULL, 0); } if (fabs(temp1) >= fabs(temp2)){ ratio = temp2 / temp1; den = dp1 /(temp1 * ( 1 + ratio * ratio)); temp3 = den; temp4 = -ratio * den; } else { ratio = temp1 / temp2; den = dp1 /(temp2 * ( 1 + ratio * ratio)); temp3 = ratio * den; temp4 = -den; } if (j + 1 < m) { SCAL_K(m - j - 1, 0, 0, temp3, temp4, b + (j + 1) * 2, 1, NULL, 0, NULL, 0); } } else { if (!info) info = j + 1; } } b += lda * 2; } return info; }