blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { BLASLONG n, lda; FLOAT *a; FLOAT ajj[2]; FLOAT *aoffset; BLASLONG i, j; n = args -> n; a = (FLOAT *)args -> a; lda = args -> lda; if (range_n) { n = range_n[1] - range_n[0]; a += range_n[0] * (lda + 1) * COMPSIZE; } aoffset = a; for (j = 0; j < n; j++) { ajj[0] = DOTC_K(j, a + j * 2, lda, a + j * 2, lda); GET_IMAGE(ajj[1]); ajj[0] = *(aoffset + j * 2) - ajj[0]; if (ajj[0] <= 0){ *(aoffset + j * 2 + 0) = ajj[0]; *(aoffset + j * 2 + 1) = ZERO; return j + 1; } ajj[0] = SQRT(ajj[0]); *(aoffset + j * 2 + 0) = ajj[0]; *(aoffset + j * 2 + 1) = ZERO; i = n - j - 1; if (i > 0) { GEMV_O(i, j, 0, dm1, ZERO, a + (j + 1) * 2, lda, a + j * 2, lda, aoffset + (j + 1) * 2, 1, sb); SCAL_K(i, 0, 0, ONE / ajj[0], ZERO, aoffset + (j + 1) * 2, 1, NULL, 0, NULL, 0); } aoffset += lda * 2; } return 0; }
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { BLASLONG n, lda; FLOAT *a; FLOAT temp[2]; BLASLONG i; n = args -> n; a = (FLOAT *)args -> a; lda = args -> lda; if (range_n) { n = range_n[1] - range_n[0]; a += range_n[0] * (lda + 1) * COMPSIZE; } for (i = 0; i < n; i++) { SCAL_K(i + 1, 0, 0, *(a + (i + i * lda) * COMPSIZE + 0), ZERO, a + i * COMPSIZE, lda, NULL, 0, NULL, 0); if (i < n - 1) { temp[0] = DOTC_K(n - i - 1, a + (i + 1 + i * lda) * COMPSIZE, 1, a + (i + 1 + i * lda) * COMPSIZE, 1); GET_IMAGE(temp[1]); *(a + (i + i * lda) * COMPSIZE + 0) += temp[0]; *(a + (i + i * lda) * COMPSIZE + 1) = ZERO; GEMV_U(n - i - 1, i, 0, dp1, ZERO, a + ((i + 1) ) * COMPSIZE, lda, a + ((i + 1) + i * lda) * COMPSIZE, 1, a + ( i ) * COMPSIZE , lda, sb); } } return 0; }
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) FLOAT _Complex result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; #endif FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } for (is = m; is > 0; is -= DTB_ENTRIES){ min_i = MIN(is, DTB_ENTRIES); #if (TRANSA == 2) || (TRANSA == 4) if (m - is > 0){ #if TRANSA == 2 GEMV_T(m - is, min_i, 0, dm1, ZERO, a + (is + (is - min_i) * lda) * COMPSIZE, lda, B + is * COMPSIZE, 1, B + (is - min_i) * COMPSIZE, 1, gemvbuffer); #else GEMV_C(m - is, min_i, 0, dm1, ZERO, a + (is + (is - min_i) * lda) * COMPSIZE, lda, B + is * COMPSIZE, 1, B + (is - min_i) * COMPSIZE, 1, gemvbuffer); #endif } #endif for (i = 0; i < min_i; i++) { FLOAT *AA = a + ((is - i - 1) + (is - i - 1) * lda) * COMPSIZE; FLOAT *BB = B + (is - i - 1) * COMPSIZE; #if (TRANSA == 2) || (TRANSA == 4) if (i > 0) { #if TRANSA == 2 result = DOTU_K(i, AA + 2, 1, BB + 2, 1); #else result = DOTC_K(i, AA + 2, 1, BB + 2, 1); #endif BB[0] -= CREAL(result); BB[1] -= CIMAG(result); } #endif #ifndef UNIT ar = AA[0]; ai = AA[1]; if (fabs(ar) >= fabs(ai)){ ratio = ai / ar; den = 1./(ar * ( 1 + ratio * ratio)); ar = den; #if TRANSA < 3 ai = -ratio * den; #else ai = ratio * den; #endif } else { ratio = ar / ai; den = 1./(ai * ( 1 + ratio * ratio)); ar = ratio * den; #if TRANSA < 3 ai = -den; #else ai = den; #endif } br = BB[0]; bi = BB[1]; BB[0] = ar*br - ai*bi; BB[1] = ar*bi + ai*br; #endif #if (TRANSA == 1) || (TRANSA == 3) if (i < min_i - 1) { #if TRANSA == 1 AXPYU_K (min_i - i - 1, 0, 0, - BB[0], -BB[1], AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0); #else AXPYC_K(min_i - i - 1, 0, 0, - BB[0], -BB[1], AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0); #endif } #endif } #if (TRANSA == 1) || (TRANSA == 3) if (is - min_i > 0){ #if TRANSA == 1 GEMV_N(is - min_i, min_i, 0, dm1, ZERO, a + (is - min_i) * lda * COMPSIZE, lda, B + (is - min_i) * COMPSIZE, 1, B, 1, gemvbuffer); #else GEMV_R(is - min_i, min_i, 0, dm1, ZERO, a + (is - min_i) * lda * COMPSIZE, lda, B + (is - min_i) * COMPSIZE, 1, B, 1, gemvbuffer); #endif } #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer) { BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; #endif FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } for (is =0; is < m; is += DTB_ENTRIES) { min_i = MIN(m - is, DTB_ENTRIES); #if (TRANSA) == 1 || (TRANSA == 3) if (is > 0) { #if TRANSA == 1 GEMV_N(is, min_i, 0, dp1, ZERO, a + is * lda * 2, lda, B + is * 2, 1, B, 1, gemvbuffer); #else GEMV_R(is, min_i, 0, dp1, ZERO, a + is * lda * 2, lda, B + is * 2, 1, B, 1, gemvbuffer); #endif } #endif for (i = 0; i < min_i; i++) { FLOAT *AA = a + (is + (i + is) * lda) * 2; FLOAT *BB = B + is * 2; #if (TRANSA == 1) || (TRANSA == 3) #if TRANSA == 1 if (i > 0) AXPYU_K (i, 0, 0, BB[i * 2 + 0], BB[i * 2 + 1], AA, 1, BB, 1, NULL, 0); #else if (i > 0) AXPYC_K(i, 0, 0, BB[i * 2 + 0], BB[i * 2 + 1], AA, 1, BB, 1, NULL, 0); #endif #endif #ifndef UNIT atemp1 = AA[i * 2 + 0]; atemp2 = AA[i * 2 + 1]; btemp1 = BB[i * 2 + 0]; btemp2 = BB[i * 2 + 1]; #if (TRANSA == 1) || (TRANSA == 2) BB[i * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2; BB[i * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1; #else BB[i * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2; BB[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1; #endif #endif #if (TRANSA == 2) || (TRANSA == 4) if (i < min_i - 1) { #if TRANSA == 2 temp = DOTU_K(min_i - i - 1, AA + (i + 1) * 2, 1, BB + (i + 1) * 2, 1); #else temp = DOTC_K(min_i - i - 1, AA + (i + 1) * 2, 1, BB + (i + 1) * 2, 1); #endif BB[i * 2 + 0] += CREAL(temp); BB[i * 2 + 1] += CIMAG(temp); } #endif } #if (TRANSA) == 2 || (TRANSA == 4) if (m - is > min_i) { #if TRANSA == 2 GEMV_T(m - is - min_i, min_i, 0, dp1, ZERO, a + (is + min_i + is * lda) * 2, lda, B + (is + min_i) * 2, 1, B + is * 2, 1, gemvbuffer); #else GEMV_C(m - is - min_i, min_i, 0, dp1, ZERO, a + (is + min_i + is * lda) * 2, lda, B + (is + min_i) * 2, 1, B + is * 2, 1, gemvbuffer); #endif } #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) FLOAT _Complex temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; #endif FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, b, incb, buffer, 1); } a += (m + 1) * m - 2; for (i = 0; i < m; i++) { #if (TRANSA == 1) || (TRANSA == 3) #if TRANSA == 1 if (i > 0) AXPYU_K (i, 0, 0, B[(m - i - 1) * 2 + 0], B[(m - i - 1) * 2 + 1], a + 2, 1, B + (m - i) * 2, 1, NULL, 0); #else if (i > 0) AXPYC_K(i, 0, 0, B[(m - i - 1) * 2 + 0], B[(m - i - 1) * 2 + 1], a + 2, 1, B + (m - i) * 2, 1, NULL, 0); #endif #endif #ifndef UNIT atemp1 = a[0]; atemp2 = a[1]; btemp1 = B[(m - i - 1) * 2 + 0]; btemp2 = B[(m - i - 1) * 2 + 1]; #if (TRANSA == 1) || (TRANSA == 2) B[(m - i - 1) * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2; B[(m - i - 1) * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1; #else B[(m - i - 1) * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2; B[(m - i - 1) * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1; #endif #endif #if (TRANSA == 2) || (TRANSA == 4) if (i < m - 1) { #if TRANSA == 2 temp = DOTU_K(m - i - 1, a - (m - i - 1) * 2, 1, B, 1); #else temp = DOTC_K(m - i - 1, a - (m - i - 1) * 2, 1, B, 1); #endif B[(m - i - 1) * 2 + 0] += CREAL(temp); B[(m - i - 1) * 2 + 1] += CIMAG(temp); } #endif #if (TRANSA == 1) || (TRANSA == 3) a -= (i + 2) * 2; #else a -= (m - i) * 2; #endif } if (incb != 1) { COPY_K(m, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, void *buffer){ BLASLONG i; FLOAT *X = x; FLOAT *Y = y; FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; FLOAT temp[2]; if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); gemvbuffer = bufferX; COPY_K(m, y, incy, Y, 1); } if (incx != 1) { X = bufferX; gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095); COPY_K(m, x, incx, X, 1); } for (i = 0; i < m; i++) { #ifndef HEMVREV #ifndef LOWER if (i > 0) { FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); } temp[0] = a[i * 2 + 0] * X[i * 2 + 0]; temp[1] = a[i * 2 + 0] * X[i * 2 + 1]; Y[i * 2 + 0] += alpha_r * temp[0] - alpha_i * temp[1]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (i > 0) { AXPYU_K(i, 0, 0, alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1], alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0], a, 1, Y, 1, NULL, 0); } a += (i + 1) * 2; #else if (m - i > 1) { FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); } temp[0] = a[i * 2 + 0] * X[i * 2 + 0]; temp[1] = a[i * 2 + 0] * X[i * 2 + 1]; Y[i * 2 + 0] += alpha_r * temp[0] - alpha_i * temp[1]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (m - i > 1) { AXPYU_K(m - i - 1, 0, 0, alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1], alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0], a + (i + 1) * 2, 1, Y + (i + 1) * 2, 1, NULL, 0); } a += (m - i - 1) * 2; #endif #else #ifndef LOWER if (i > 0) { FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); } temp[0] = a[i * 2 + 0] * X[i * 2 + 0]; temp[1] = a[i * 2 + 0] * X[i * 2 + 1]; Y[i * 2 + 0] += alpha_r * temp[0] - alpha_i * temp[1]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (i > 0) { AXPYC_K(i, 0, 0, alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1], alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0], a, 1, Y, 1, NULL, 0); } a += (i + 1) * 2; #else if (m - i > 1) { FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); } temp[0] = a[i * 2 + 0] * X[i * 2 + 0]; temp[1] = a[i * 2 + 0] * X[i * 2 + 1]; Y[i * 2 + 0] += alpha_r * temp[0] - alpha_i * temp[1]; Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (m - i > 1) { AXPYC_K(m - i - 1, 0, 0, alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1], alpha_r * X[i * 2 + 1] + alpha_i * X[i * 2 + 0], a + (i + 1) * 2, 1, Y + (i + 1) * 2, 1, NULL, 0); } a += (m - i - 1) * 2; #endif #endif } if (incy != 1) { COPY_K(m, Y, 1, y, incy); } return 0; }
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; #endif if (incb != 1) { B = buffer; gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); COPY_K(n, b, incb, buffer, 1); } for (i = 0; i < n; i++) { #if (TRANSA == 1) || (TRANSA == 3) length = i; if (length > k) length = k; if (length > 0) { #if TRANSA == 1 AXPYU_K(length, 0, 0, B[i * 2 + 0], B[i * 2 + 1], a + (k - length) * COMPSIZE, 1, B + (i - length) * COMPSIZE, 1, NULL, 0); #else AXPYC_K(length, 0, 0, B[i * 2 + 0], B[i * 2 + 1], a + (k - length) * COMPSIZE, 1, B + (i - length) * COMPSIZE, 1, NULL, 0); #endif } #endif #ifndef UNIT #if (TRANSA == 1) || (TRANSA == 3) atemp1 = a[k * 2 + 0]; atemp2 = a[k * 2 + 1]; #else atemp1 = a[0]; atemp2 = a[1]; #endif btemp1 = B[i * 2 + 0]; btemp2 = B[i * 2 + 1]; #if (TRANSA == 1) || (TRANSA == 2) B[i * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2; B[i * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1; #else B[i * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2; B[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1; #endif #endif #if (TRANSA == 2) || (TRANSA == 4) length = n - i - 1; if (length > k) length = k; if (length > 0) { #if TRANSA == 2 temp = DOTU_K(length, a + COMPSIZE, 1, B + (i + 1) * COMPSIZE, 1); #else temp = DOTC_K(length, a + COMPSIZE, 1, B + (i + 1) * COMPSIZE, 1); #endif B[i * 2 + 0] += CREAL(temp); B[i * 2 + 1] += CIMAG(temp); } #endif a += lda * COMPSIZE; } if (incb != 1) { COPY_K(n, buffer, 1, b, incb); } return 0; }
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; #endif if (incb != 1) { B = buffer; COPY_K(n, b, incb, buffer, 1); } for (i = 0; i < n; i++) { #if (TRANSA == 2) || (TRANSA == 4) length = i; if (length > k) length = k; if (length > 0) { #if TRANSA == 2 temp = DOTU_K(length, a + (k - length) * COMPSIZE, 1, B + (i - length) * COMPSIZE, 1); #else temp = DOTC_K(length, a + (k - length) * COMPSIZE, 1, B + (i - length) * COMPSIZE, 1); #endif B[i * 2 + 0] -= CREAL(temp); B[i * 2 + 1] -= CIMAG(temp); } #endif #ifndef UNIT #if (TRANSA == 1) || (TRANSA == 3) ar = a[0]; ai = a[1]; #else ar = a[k * 2 + 0]; ai = a[k * 2 + 1]; #endif if (fabs(ar) >= fabs(ai)){ ratio = ai / ar; den = 1./(ar * ( 1 + ratio * ratio)); ar = den; #if TRANSA < 3 ai = -ratio * den; #else ai = ratio * den; #endif } else { ratio = ar / ai; den = 1./(ai * ( 1 + ratio * ratio)); ar = ratio * den; #if TRANSA < 3 ai = -den; #else ai = den; #endif } br = B[i * 2 + 0]; bi = B[i * 2 + 1]; B[i * 2 + 0] = ar*br - ai*bi; B[i * 2 + 1] = ar*bi + ai*br; #endif #if (TRANSA == 1) || (TRANSA == 3) length = n - i - 1; if (length > k) length = k; if (length > 0) { #if TRANSA == 1 AXPYU_K(length, 0, 0, -B[i * 2 + 0], -B[i * 2 + 1], a + COMPSIZE, 1, B + (i + 1) * COMPSIZE, 1, NULL, 0); #else AXPYC_K(length, 0, 0, -B[i * 2 + 0], -B[i * 2 + 1], a + COMPSIZE, 1, B + (i + 1) * COMPSIZE, 1, NULL, 0); #endif } #endif a += lda * COMPSIZE; } if (incb != 1) { COPY_K(n, buffer, 1, b, incb); } return 0; }