blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { BLASLONG n, lda; FLOAT *a; FLOAT aii; BLASLONG i; n = args -> n; a = (FLOAT *)args -> a; lda = args -> lda; if (range_n) { n = range_n[1] - range_n[0]; a += range_n[0] * (lda + 1) * COMPSIZE; } for (i = 0; i < n; i++) { SCAL_K(i + 1, 0, 0, *(a + i + i * lda), a + i, lda, NULL, 0, NULL, 0); if (i < n - 1) { aii = DOTU_K(n - i - 1, a + i + 1 + i * lda, 1, a + i + 1 + i * lda, 1); *(a + i + i * lda) += aii; GEMV_T(n - i - 1, i, 0, dp1, a + (i + 1) , lda, a + (i + 1) + i * lda, 1, a + i , lda, sb); } } return 0; }
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer){ BLASLONG i, is, min_i; FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } for (is = 0; is < m; is += DTB_ENTRIES){ min_i = MIN(m - is, DTB_ENTRIES); #ifndef TRANSA if (is > 0){ GEMV_N(is, min_i, 0, dp1, a + is * lda, lda, B + is, 1, B, 1, gemvbuffer); } #endif for (i = 0; i < min_i; i++) { FLOAT *AA = a + is + (i + is) * lda; FLOAT *BB = B + is; #ifndef TRANSA if (i > 0) AXPYU_K(i, 0, 0, BB[i], AA, 1, BB, 1, NULL, 0); #endif #ifndef UNIT BB[i] *= AA[i]; #endif #ifdef TRANSA if (i < min_i - 1) BB[i] += DOTU_K(min_i - i - 1, AA + i + 1, 1, BB + i + 1, 1); #endif } #ifdef TRANSA if (m - is > min_i){ GEMV_T(m - is - min_i, min_i, 0, dp1, a + is + min_i + is * lda, lda, B + is + min_i, 1, B + is, 1, gemvbuffer); } #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i, is, min_i; FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } for (is = m; is > 0; is -= DTB_ENTRIES){ min_i = MIN(is, DTB_ENTRIES); #ifdef TRANSA if (m - is > 0){ GEMV_T(m - is, min_i, 0, dm1, a + is + (is - min_i) * lda, lda, B + is, 1, B + is - min_i, 1, gemvbuffer); } #endif for (i = 0; i < min_i; i++) { FLOAT *AA = a + (is - i - 1) + (is - i - 1) * lda; FLOAT *BB = B + (is - i - 1); #ifdef TRANSA if (i > 0) BB[0] -= DOTU_K(i, AA + 1, 1, BB + 1, 1); #endif #ifndef UNIT BB[0] /= AA[0]; #endif #ifndef TRANSA if (i < min_i - 1) AXPYU_K(min_i - i - 1, 0, 0, -BB[0], AA - (min_i - i - 1), 1, BB - (min_i - i - 1), 1, NULL, 0); #endif } #ifndef TRANSA if (is - min_i > 0){ GEMV_N(is - min_i, min_i, 0, dm1, a + (is - min_i) * lda, lda, B + is - min_i, 1, B, 1, gemvbuffer); } #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) FLOAT _Complex result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; #endif FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } for (is = m; is > 0; is -= DTB_ENTRIES){ min_i = MIN(is, DTB_ENTRIES); #if (TRANSA == 2) || (TRANSA == 4) if (m - is > 0){ #if TRANSA == 2 GEMV_T(m - is, min_i, 0, dm1, ZERO, a + (is + (is - min_i) * lda) * COMPSIZE, lda, B + is * COMPSIZE, 1, B + (is - min_i) * COMPSIZE, 1, gemvbuffer); #else GEMV_C(m - is, min_i, 0, dm1, ZERO, a + (is + (is - min_i) * lda) * COMPSIZE, lda, B + is * COMPSIZE, 1, B + (is - min_i) * COMPSIZE, 1, gemvbuffer); #endif } #endif for (i = 0; i < min_i; i++) { FLOAT *AA = a + ((is - i - 1) + (is - i - 1) * lda) * COMPSIZE; FLOAT *BB = B + (is - i - 1) * COMPSIZE; #if (TRANSA == 2) || (TRANSA == 4) if (i > 0) { #if TRANSA == 2 result = DOTU_K(i, AA + 2, 1, BB + 2, 1); #else result = DOTC_K(i, AA + 2, 1, BB + 2, 1); #endif BB[0] -= CREAL(result); BB[1] -= CIMAG(result); } #endif #ifndef UNIT ar = AA[0]; ai = AA[1]; if (fabs(ar) >= fabs(ai)){ ratio = ai / ar; den = 1./(ar * ( 1 + ratio * ratio)); ar = den; #if TRANSA < 3 ai = -ratio * den; #else ai = ratio * den; #endif } else { ratio = ar / ai; den = 1./(ai * ( 1 + ratio * ratio)); ar = ratio * den; #if TRANSA < 3 ai = -den; #else ai = den; #endif } br = BB[0]; bi = BB[1]; BB[0] = ar*br - ai*bi; BB[1] = ar*bi + ai*br; #endif #if (TRANSA == 1) || (TRANSA == 3) if (i < min_i - 1) { #if TRANSA == 1 AXPYU_K (min_i - i - 1, 0, 0, - BB[0], -BB[1], AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0); #else AXPYC_K(min_i - i - 1, 0, 0, - BB[0], -BB[1], AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0); #endif } #endif } #if (TRANSA == 1) || (TRANSA == 3) if (is - min_i > 0){ #if TRANSA == 1 GEMV_N(is - min_i, min_i, 0, dm1, ZERO, a + (is - min_i) * lda * COMPSIZE, lda, B + (is - min_i) * COMPSIZE, 1, B, 1, gemvbuffer); #else GEMV_R(is - min_i, min_i, 0, dm1, ZERO, a + (is - min_i) * lda * COMPSIZE, lda, B + (is - min_i) * COMPSIZE, 1, B, 1, gemvbuffer); #endif } #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer) { BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; #endif FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } for (is =0; is < m; is += DTB_ENTRIES) { min_i = MIN(m - is, DTB_ENTRIES); #if (TRANSA) == 1 || (TRANSA == 3) if (is > 0) { #if TRANSA == 1 GEMV_N(is, min_i, 0, dp1, ZERO, a + is * lda * 2, lda, B + is * 2, 1, B, 1, gemvbuffer); #else GEMV_R(is, min_i, 0, dp1, ZERO, a + is * lda * 2, lda, B + is * 2, 1, B, 1, gemvbuffer); #endif } #endif for (i = 0; i < min_i; i++) { FLOAT *AA = a + (is + (i + is) * lda) * 2; FLOAT *BB = B + is * 2; #if (TRANSA == 1) || (TRANSA == 3) #if TRANSA == 1 if (i > 0) AXPYU_K (i, 0, 0, BB[i * 2 + 0], BB[i * 2 + 1], AA, 1, BB, 1, NULL, 0); #else if (i > 0) AXPYC_K(i, 0, 0, BB[i * 2 + 0], BB[i * 2 + 1], AA, 1, BB, 1, NULL, 0); #endif #endif #ifndef UNIT atemp1 = AA[i * 2 + 0]; atemp2 = AA[i * 2 + 1]; btemp1 = BB[i * 2 + 0]; btemp2 = BB[i * 2 + 1]; #if (TRANSA == 1) || (TRANSA == 2) BB[i * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2; BB[i * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1; #else BB[i * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2; BB[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1; #endif #endif #if (TRANSA == 2) || (TRANSA == 4) if (i < min_i - 1) { #if TRANSA == 2 temp = DOTU_K(min_i - i - 1, AA + (i + 1) * 2, 1, BB + (i + 1) * 2, 1); #else temp = DOTC_K(min_i - i - 1, AA + (i + 1) * 2, 1, BB + (i + 1) * 2, 1); #endif BB[i * 2 + 0] += CREAL(temp); BB[i * 2 + 1] += CIMAG(temp); } #endif } #if (TRANSA) == 2 || (TRANSA == 4) if (m - is > min_i) { #if TRANSA == 2 GEMV_T(m - is - min_i, min_i, 0, dp1, ZERO, a + (is + min_i + is * lda) * 2, lda, B + (is + min_i) * 2, 1, B + is * 2, 1, gemvbuffer); #else GEMV_C(m - is - min_i, min_i, 0, dp1, ZERO, a + (is + min_i + is * lda) * 2, lda, B + (is + min_i) * 2, 1, B + is * 2, 1, gemvbuffer); #endif } #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ BLASLONG is, min_i; FLOAT *X = x; FLOAT *Y = y; FLOAT *symbuffer = buffer; FLOAT *gemvbuffer = (FLOAT *)(((BLASLONG)buffer + SYMV_P * SYMV_P * sizeof(FLOAT) * 2 + 4095) & ~4095); FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); gemvbuffer = bufferX; COPY_K(m, y, incy, Y, 1); } if (incx != 1) { X = bufferX; gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, x, incx, X, 1); } #ifndef LOWER for(is = m - offset; is < m; is += SYMV_P){ min_i = MIN(m - is, SYMV_P); #else for(is = 0; is < offset; is += SYMV_P){ min_i = MIN(offset - is, SYMV_P); #endif #ifndef LOWER if (is >0){ GEMV_T(is, min_i, 0, alpha_r, alpha_i, a + is * lda * COMPSIZE, lda, X, 1, Y + is * COMPSIZE, 1, gemvbuffer); GEMV_N(is, min_i, 0, alpha_r, alpha_i, a + is * lda * COMPSIZE, lda, X + is * COMPSIZE, 1, Y, 1, gemvbuffer); } #endif #ifdef LOWER ZSYMCOPY_L(min_i, a + (is + is * lda) * COMPSIZE, lda, symbuffer); #else ZSYMCOPY_U(min_i, a + (is + is * lda) * COMPSIZE, lda, symbuffer); #endif GEMV_N(min_i, min_i, 0, alpha_r, alpha_i, symbuffer, min_i, X + is * COMPSIZE, 1, Y + is * COMPSIZE, 1, gemvbuffer); #ifdef LOWER if (m - is > min_i){ GEMV_T(m - is - min_i, min_i, 0, alpha_r, alpha_i, a + ((is + min_i) + is * lda) * COMPSIZE, lda, X + (is + min_i) * COMPSIZE, 1, Y + is * COMPSIZE, 1, gemvbuffer); GEMV_N(m - is - min_i, min_i, 0, alpha_r, alpha_i, a + ((is + min_i) + is * lda) * COMPSIZE, lda, X + is * COMPSIZE, 1, Y + (is + min_i) * COMPSIZE, 1, gemvbuffer); } #endif } /* end of is */ if (incy != 1) { COPY_K(m, Y, 1, y, incy); } return 0; }