static void BLAS_csy_norm(enum blas_order_type order, enum blas_norm_type norm, enum blas_uplo_type uplo, int n, const PLASMA_Complex32_t *a, int lda, float *res) { int i, j; float anorm, v; char rname[] = "BLAS_csy_norm"; if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 ); if (norm == blas_inf_norm) { anorm = 0.0; if (blas_upper == uplo) { for (i = 0; i < n; ++i) { v = 0.0; for (j = 0; j < i; ++j) { v += cabsf( a[j + i * lda] ); } for (j = i; j < n; ++j) { v += cabsf( a[i + j * lda] ); } if (v > anorm) anorm = v; } } else { BLAS_error( rname, -3, norm, 0 ); return; } } else { BLAS_error( rname, -2, norm, 0 ); return; } if (res) *res = anorm; }
static double BLAS_dfpinfo(enum blas_cmach_type cmach) { double eps = 1.0, r = 1.0, o = 1.0, b = 2.0; int t = 53, l = 1024, m = -1021; char rname[] = "BLAS_dfpinfo"; if ((sizeof eps) == sizeof(float)) { t = 24; l = 128; m = -125; } else { t = 53; l = 1024; m = -1021; } /* for (i = 0; i < t; ++i) eps *= half; */ eps = BLAS_dpow_di( b, -t ); /* for (i = 0; i >= m; --i) r *= half; */ r = BLAS_dpow_di( b, m-1 ); o -= eps; /* for (i = 0; i < l; ++i) o *= b; */ o = (o * BLAS_dpow_di( b, l-1 )) * b; switch (cmach) { case blas_eps: return eps; case blas_sfmin: return r; default: BLAS_error( rname, -1, cmach, 0 ); break; } return 0.0; }
static void BLAS_zge_norm(enum blas_order_type order, enum blas_norm_type norm, int m, int n, const PLASMA_Complex64_t *a, int lda, double *res) { int i, j; float anorm, v; char rname[] = "BLAS_zge_norm"; if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 ); if (norm == blas_frobenius_norm) { anorm = 0.0f; for (j = n; j; --j) { for (i = m; i; --i) { v = a[0]; anorm += v * v; a++; } a += lda - m; } anorm = sqrt( anorm ); } else if (norm == blas_inf_norm) { anorm = 0.0f; for (i = 0; i < m; ++i) { v = 0.0f; for (j = 0; j < n; ++j) { v += cabs( a[i + j * lda] ); } if (v > anorm) anorm = v; } } else { BLAS_error( rname, -2, norm, 0 ); return; } if (res) *res = anorm; }
void BLAS_ztpmv_d(enum blas_order_type order, enum blas_uplo_type uplo, enum blas_trans_type trans, enum blas_diag_type diag, int n, const void *alpha, const double *tp, void *x, int incx) /* * Purpose * ======= * * Computes x = alpha * tp * x, x = alpha * tp_transpose * x, * or x = alpha * tp_conjugate_transpose where tp is a triangular * packed matrix. * * Arguments * ========= * * order (input) blas_order_type * Order of tp; row or column major * * uplo (input) blas_uplo_type * Whether tp is upper or lower * * trans (input) blas_trans_type * * diag (input) blas_diag_type * Whether the diagonal entries of tp are 1 * * n (input) int * Dimension of tp and the length of vector x * * alpha (input) const void* * * tp (input) double* * * x (input) void* * * incx (input) int * The stride for vector x. * */ { static const char routine_name[] = "BLAS_ztpmv_d"; { int matrix_row, step, tp_index, tp_start, x_index, x_start; int inctp, x_index2, stride, col_index, inctp2; double *alpha_i = (double *) alpha; const double *tp_i = tp; double *x_i = (double *) x; double rowsum[2]; double rowtmp[2]; double result[2]; double matval; double vecval[2]; double one; one = 1.0; inctp = 1; incx *= 2; if (incx < 0) x_start = (-n + 1) * incx; else x_start = 0; if (n < 1) { return; } /* Check for error conditions. */ if (order != blas_colmajor && order != blas_rowmajor) { BLAS_error(routine_name, -1, order, NULL); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, NULL); } if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } { if ((uplo == blas_upper && trans == blas_no_trans && order == blas_rowmajor) || (uplo == blas_lower && trans != blas_no_trans && order == blas_colmajor)) { tp_start = 0; tp_index = tp_start; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start + incx * matrix_row; x_index2 = x_index; col_index = matrix_row; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; result[0] = result[1] = 0.0; while (col_index < n) { vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; if ((diag == blas_unit_diag) && (col_index == matrix_row)) { { rowtmp[0] = vecval[0] * one; rowtmp[1] = vecval[1] * one; } } else { matval = tp_i[tp_index]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; x_index += incx; tp_index += inctp; col_index++; } { result[0] = (double) rowsum[0] * alpha_i[0] - (double) rowsum[1] * alpha_i[1]; result[1] = (double) rowsum[0] * alpha_i[1] + (double) rowsum[1] * alpha_i[0]; } x_i[x_index2] = result[0]; x_i[x_index2 + 1] = result[1]; } } else if ((uplo == blas_upper && trans == blas_no_trans && order == blas_colmajor) || (uplo == blas_lower && trans != blas_no_trans && order == blas_rowmajor)) { tp_start = ((n - 1) * n) / 2; inctp2 = n - 1; x_index2 = x_start; for (matrix_row = 0; matrix_row < n; matrix_row++, inctp2 = n - 1) { x_index = x_start + incx * (n - 1); tp_index = (tp_start + matrix_row) * inctp; col_index = (n - 1) - matrix_row; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; result[0] = result[1] = 0.0; while (col_index >= 0) { vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; if ((diag == blas_unit_diag) && (col_index == 0)) { { rowtmp[0] = vecval[0] * one; rowtmp[1] = vecval[1] * one; } } else { matval = tp_i[tp_index]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; x_index -= incx; tp_index -= inctp2 * inctp; inctp2--; col_index--; } { result[0] = (double) rowsum[0] * alpha_i[0] - (double) rowsum[1] * alpha_i[1]; result[1] = (double) rowsum[0] * alpha_i[1] + (double) rowsum[1] * alpha_i[0]; } x_i[x_index2] = result[0]; x_i[x_index2 + 1] = result[1]; x_index2 += incx; } } else if ((uplo == blas_lower && trans == blas_no_trans && order == blas_rowmajor) || (uplo == blas_upper && trans != blas_no_trans && order == blas_colmajor)) { tp_start = (n - 1) + ((n - 1) * n) / 2; tp_index = tp_start * inctp; x_index = x_start + (n - 1) * incx; for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) { x_index2 = x_index; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; result[0] = result[1] = 0.0; for (step = 0; step <= matrix_row; step++) { vecval[0] = x_i[x_index2]; vecval[1] = x_i[x_index2 + 1]; if ((diag == blas_unit_diag) && (step == 0)) { { rowtmp[0] = vecval[0] * one; rowtmp[1] = vecval[1] * one; } } else { matval = tp_i[tp_index]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; x_index2 -= incx; tp_index -= inctp; } { result[0] = (double) rowsum[0] * alpha_i[0] - (double) rowsum[1] * alpha_i[1]; result[1] = (double) rowsum[0] * alpha_i[1] + (double) rowsum[1] * alpha_i[0]; } x_i[x_index] = result[0]; x_i[x_index + 1] = result[1]; x_index -= incx; } } else { tp_start = 0; x_index = x_start + (n - 1) * incx; for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) { tp_index = matrix_row * inctp; x_index2 = x_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; result[0] = result[1] = 0.0; stride = n; for (step = 0; step <= matrix_row; step++) { vecval[0] = x_i[x_index2]; vecval[1] = x_i[x_index2 + 1]; if ((diag == blas_unit_diag) && (step == matrix_row)) { { rowtmp[0] = vecval[0] * one; rowtmp[1] = vecval[1] * one; } } else { matval = tp_i[tp_index]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; stride--; tp_index += stride * inctp; x_index2 += incx; } { result[0] = (double) rowsum[0] * alpha_i[0] - (double) rowsum[1] * alpha_i[1]; result[1] = (double) rowsum[0] * alpha_i[1] + (double) rowsum[1] * alpha_i[0]; } x_i[x_index] = result[0]; x_i[x_index + 1] = result[1]; x_index -= incx; } } } } }
void BLAS_cdot_s_s_x(enum blas_conj_type conj, int n, const void *alpha, const float *x, int incx, const void *beta, const float *y, int incy, void *r, enum blas_prec_type prec) /* * Purpose * ======= * * This routine computes the inner product: * * r <- beta * r + alpha * SUM_{i=0, n-1} x[i] * y[i]. * * Arguments * ========= * * conj (input) enum blas_conj_type * When x and y are complex vectors, specifies whether vector * components x[i] are used unconjugated or conjugated. * * n (input) int * The length of vectors x and y. * * alpha (input) const void* * * x (input) const float* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * * beta (input) const void* * * y (input) const float* * Array of length n. * * incy (input) int * The stride used to access components y[i]. * * r (input/output) void* * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { static const char routine_name[] = "BLAS_cdot_s_s_x"; switch (prec) { case blas_prec_single:{ int i, ix = 0, iy = 0; float *r_i = (float *) r; const float *x_i = x; const float *y_i = y; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float x_ii; float y_ii; float r_v[2]; float prod; float sum; float tmp1[2]; float tmp2[2]; /* Test the input parameters. */ if (n < 0) BLAS_error(routine_name, -2, n, NULL); else if (incx == 0) BLAS_error(routine_name, -5, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -8, incy, NULL); /* Immediate return. */ if (((beta_i[0] == 1.0 && beta_i[1] == 0.0)) && (n == 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0))) return; r_v[0] = r_i[0]; r_v[1] = r_i[0 + 1]; sum = 0.0; if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; for (i = 0; i < n; ++i) { x_ii = x_i[ix]; y_ii = y_i[iy]; prod = x_ii * y_ii; /* prod = x[i]*y[i] */ sum = sum + prod; /* sum = sum+prod */ ix += incx; iy += incy; } /* endfor */ { tmp1[0] = alpha_i[0] * sum; tmp1[1] = alpha_i[1] * sum; } /* tmp1 = sum*alpha */ { tmp2[0] = r_v[0] * beta_i[0] - r_v[1] * beta_i[1]; tmp2[1] = r_v[0] * beta_i[1] + r_v[1] * beta_i[0]; } /* tmp2 = r*beta */ tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; /* tmp1 = tmp1+tmp2 */ ((float *) r)[0] = tmp1[0]; ((float *) r)[1] = tmp1[1]; /* r = tmp1 */ break; } case blas_prec_double: case blas_prec_indigenous: { int i, ix = 0, iy = 0; float *r_i = (float *) r; const float *x_i = x; const float *y_i = y; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float x_ii; float y_ii; float r_v[2]; double prod; double sum; double tmp1[2]; double tmp2[2]; /* Test the input parameters. */ if (n < 0) BLAS_error(routine_name, -2, n, NULL); else if (incx == 0) BLAS_error(routine_name, -5, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -8, incy, NULL); /* Immediate return. */ if (((beta_i[0] == 1.0 && beta_i[1] == 0.0)) && (n == 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0))) return; r_v[0] = r_i[0]; r_v[1] = r_i[0 + 1]; sum = 0.0; if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; for (i = 0; i < n; ++i) { x_ii = x_i[ix]; y_ii = y_i[iy]; prod = (double) x_ii *y_ii; /* prod = x[i]*y[i] */ sum = sum + prod; /* sum = sum+prod */ ix += incx; iy += incy; } /* endfor */ { tmp1[0] = alpha_i[0] * sum; tmp1[1] = alpha_i[1] * sum; } /* tmp1 = sum*alpha */ { tmp2[0] = (double) r_v[0] * beta_i[0] - (double) r_v[1] * beta_i[1]; tmp2[1] = (double) r_v[0] * beta_i[1] + (double) r_v[1] * beta_i[0]; } /* tmp2 = r*beta */ tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; /* tmp1 = tmp1+tmp2 */ ((float *) r)[0] = tmp1[0]; ((float *) r)[1] = tmp1[1]; /* r = tmp1 */ } break; case blas_prec_extra: { int i, ix = 0, iy = 0; float *r_i = (float *) r; const float *x_i = x; const float *y_i = y; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float x_ii; float y_ii; float r_v[2]; double head_prod, tail_prod; double head_sum, tail_sum; double head_tmp1[2], tail_tmp1[2]; double head_tmp2[2], tail_tmp2[2]; FPU_FIX_DECL; /* Test the input parameters. */ if (n < 0) BLAS_error(routine_name, -2, n, NULL); else if (incx == 0) BLAS_error(routine_name, -5, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -8, incy, NULL); /* Immediate return. */ if (((beta_i[0] == 1.0 && beta_i[1] == 0.0)) && (n == 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0))) return; FPU_FIX_START; r_v[0] = r_i[0]; r_v[1] = r_i[0 + 1]; head_sum = tail_sum = 0.0; if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; for (i = 0; i < n; ++i) { x_ii = x_i[ix]; y_ii = y_i[iy]; head_prod = (double) x_ii *y_ii; tail_prod = 0.0; /* prod = x[i]*y[i] */ { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } /* sum = sum+prod */ ix += incx; iy += incy; } /* endfor */ { double head_e1, tail_e1; double dt; dt = (double) alpha_i[0]; { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; c11 = head_sum * dt; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * dt; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_tmp1[0] = head_e1; tail_tmp1[0] = tail_e1; dt = (double) alpha_i[1]; { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; c11 = head_sum * dt; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * dt; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_tmp1[1] = head_e1; tail_tmp1[1] = tail_e1; } /* tmp1 = sum*alpha */ { double head_e1, tail_e1; double d1; double d2; /* Real part */ d1 = (double) r_v[0] * beta_i[0]; d2 = (double) -r_v[1] * beta_i[1]; { /* Compute double-double = double + double. */ double e, t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_tmp2[0] = head_e1; tail_tmp2[0] = tail_e1; /* imaginary part */ d1 = (double) r_v[0] * beta_i[1]; d2 = (double) r_v[1] * beta_i[0]; { /* Compute double-double = double + double. */ double e, t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_tmp2[1] = head_e1; tail_tmp2[1] = tail_e1; } /* tmp2 = r*beta */ { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_tmp1[0]; tail_a = tail_tmp1[0]; head_b = head_tmp2[0]; tail_b = tail_tmp2[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp1[0] = head_t; tail_tmp1[0] = tail_t; /* Imaginary part */ head_a = head_tmp1[1]; tail_a = tail_tmp1[1]; head_b = head_tmp2[1]; tail_b = tail_tmp2[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp1[1] = head_t; tail_tmp1[1] = tail_t; } /* tmp1 = tmp1+tmp2 */ ((float *) r)[0] = head_tmp1[0]; ((float *) r)[1] = head_tmp1[1]; /* r = tmp1 */ FPU_FIX_STOP; } break; } }
void BLAS_cge_sum_mv_s_c(enum blas_order_type order, int m, int n, const void *alpha, const float *a, int lda, const void *x, int incx, const void *beta, const float *b, int ldb, void *y, int incy) /* * Purpose * ======= * * Computes y = alpha * A * x + beta * B * y, * where A, B are general matricies. * * Arguments * ========= * * order (input) blas_order_type * Order of A; row or column major * * m (input) int * Row Dimension of A, B, length of output vector y * * n (input) int * Column Dimension of A, B and the length of vector x * * alpha (input) const void* * * A (input) const float* * * lda (input) int * Leading dimension of A * * x (input) const void* * * incx (input) int * The stride for vector x. * * beta (input) const void* * * b (input) const float* * * ldb (input) int * Leading dimension of B * * y (input/output) void* * * incy (input) int * The stride for vector y. * */ { /* Routine name */ static const char routine_name[] = "BLAS_cge_sum_mv_s_c"; int i, j; int xi, yi; int x_starti, y_starti, incxi, incyi; int lda_min; int ai; int incai; int aij; int incaij; int bi; int incbi; int bij; int incbij; const float *a_i = a; const float *b_i = b; const float *x_i = (float *) x; float *y_i = (float *) y; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float a_elem; float b_elem; float x_elem[2]; float prod[2]; float sumA[2]; float sumB[2]; float tmp1[2]; float tmp2[2]; /* m is number of rows */ /* n is number of columns */ if (m == 0 || n == 0) return; /* all error calls */ if (order == blas_rowmajor) { lda_min = n; incai = lda; /* row stride */ incbi = ldb; incbij = incaij = 1; /* column stride */ } else if (order == blas_colmajor) { lda_min = m; incai = incbi = 1; /*row stride */ incaij = lda; /* column stride */ incbij = ldb; } else { /* error, order not blas_colmajor not blas_rowmajor */ BLAS_error(routine_name, -1, order, 0); return; } if (m < 0) BLAS_error(routine_name, -2, m, 0); else if (n < 0) BLAS_error(routine_name, -3, n, 0); if (lda < lda_min) BLAS_error(routine_name, -6, lda, 0); else if (ldb < lda_min) BLAS_error(routine_name, -11, ldb, 0); else if (incx == 0) BLAS_error(routine_name, -8, incx, 0); else if (incy == 0) BLAS_error(routine_name, -13, incy, 0); incxi = incx; incyi = incy; incxi *= 2; incyi *= 2; if (incxi > 0) x_starti = 0; else x_starti = (1 - n) * incxi; if (incyi > 0) y_starti = 0; else y_starti = (1 - m) * incyi; if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* alpha, beta are 0.0 */ for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { y_i[yi] = 0.0; y_i[yi + 1] = 0.0; } } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) { /* alpha is 0.0, beta is 1.0 */ bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumB[0] = sumB[1] = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; b_elem = b_i[bij]; { prod[0] = x_elem[0] * b_elem; prod[1] = x_elem[1] * b_elem; } sumB[0] = sumB[0] + prod[0]; sumB[1] = sumB[1] + prod[1]; bij += incbij; } /* now put the result into y_i */ y_i[yi] = sumB[0]; y_i[yi + 1] = sumB[1]; bi += incbi; } } else { /* alpha is 0.0, beta not 1.0 nor 0.0 */ bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumB[0] = sumB[1] = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; b_elem = b_i[bij]; { prod[0] = x_elem[0] * b_elem; prod[1] = x_elem[1] * b_elem; } sumB[0] = sumB[0] + prod[0]; sumB[1] = sumB[1] + prod[1]; bij += incbij; } /* now put the result into y_i */ { tmp1[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1]; tmp1[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0]; } y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; bi += incbi; } } } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* alpha is 1.0, beta is 0.0 */ ai = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA[0] = sumA[1] = 0.0; aij = ai; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; a_elem = a_i[aij]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sumA[0] = sumA[0] + prod[0]; sumA[1] = sumA[1] + prod[1]; aij += incaij; } /* now put the result into y_i */ y_i[yi] = sumA[0]; y_i[yi + 1] = sumA[1]; ai += incai; } } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) { /* alpha is 1.0, beta is 1.0 */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA[0] = sumA[1] = 0.0; aij = ai; sumB[0] = sumB[1] = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; a_elem = a_i[aij]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sumA[0] = sumA[0] + prod[0]; sumA[1] = sumA[1] + prod[1]; aij += incaij; b_elem = b_i[bij]; { prod[0] = x_elem[0] * b_elem; prod[1] = x_elem[1] * b_elem; } sumB[0] = sumB[0] + prod[0]; sumB[1] = sumB[1] + prod[1]; bij += incbij; } /* now put the result into y_i */ tmp1[0] = sumA[0]; tmp1[1] = sumA[1]; tmp2[0] = sumB[0]; tmp2[1] = sumB[1]; tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; ai += incai; bi += incbi; } } else { /* alpha is 1.0, beta is other */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA[0] = sumA[1] = 0.0; aij = ai; sumB[0] = sumB[1] = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; a_elem = a_i[aij]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sumA[0] = sumA[0] + prod[0]; sumA[1] = sumA[1] + prod[1]; aij += incaij; b_elem = b_i[bij]; { prod[0] = x_elem[0] * b_elem; prod[1] = x_elem[1] * b_elem; } sumB[0] = sumB[0] + prod[0]; sumB[1] = sumB[1] + prod[1]; bij += incbij; } /* now put the result into y_i */ tmp1[0] = sumA[0]; tmp1[1] = sumA[1]; { tmp2[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1]; tmp2[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; ai += incai; bi += incbi; } } } else { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* alpha is other, beta is 0.0 */ ai = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA[0] = sumA[1] = 0.0; aij = ai; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; a_elem = a_i[aij]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sumA[0] = sumA[0] + prod[0]; sumA[1] = sumA[1] + prod[1]; aij += incaij; } /* now put the result into y_i */ { tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1]; tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0]; } y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; ai += incai; } } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) { /* alpha is other, beta is 1.0 */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA[0] = sumA[1] = 0.0; aij = ai; sumB[0] = sumB[1] = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; a_elem = a_i[aij]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sumA[0] = sumA[0] + prod[0]; sumA[1] = sumA[1] + prod[1]; aij += incaij; b_elem = b_i[bij]; { prod[0] = x_elem[0] * b_elem; prod[1] = x_elem[1] * b_elem; } sumB[0] = sumB[0] + prod[0]; sumB[1] = sumB[1] + prod[1]; bij += incbij; } /* now put the result into y_i */ { tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1]; tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0]; } tmp2[0] = sumB[0]; tmp2[1] = sumB[1]; tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; ai += incai; bi += incbi; } } else { /* most general form, alpha, beta are other */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA[0] = sumA[1] = 0.0; aij = ai; sumB[0] = sumB[1] = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; a_elem = a_i[aij]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sumA[0] = sumA[0] + prod[0]; sumA[1] = sumA[1] + prod[1]; aij += incaij; b_elem = b_i[bij]; { prod[0] = x_elem[0] * b_elem; prod[1] = x_elem[1] * b_elem; } sumB[0] = sumB[0] + prod[0]; sumB[1] = sumB[1] + prod[1]; bij += incbij; } /* now put the result into y_i */ { tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1]; tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0]; } { tmp2[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1]; tmp2[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; ai += incai; bi += incbi; } } } } /* end BLAS_cge_sum_mv_s_c */
void BLAS_cgemv2_s_c(enum blas_order_type order, enum blas_trans_type trans, int m, int n, const void *alpha, const float *a, int lda, const void *head_x, const void *tail_x, int incx, const void *beta, void *y, int incy) /* * Purpose * ======= * * Computes y = alpha * op(A) * head_x + alpha * op(A) * tail_x + beta * y, * where A is a general matrix. * * Arguments * ========= * * order (input) blas_order_type * Order of A; row or column major * * trans (input) blas_trans_type * Transpose of A: no trans, trans, or conjugate trans * * m (input) int * Dimension of A * * n (input) int * Dimension of A and the length of vector x and z * * alpha (input) const void* * * A (input) const float* * * lda (input) int * Leading dimension of A * * head_x * tail_x (input) const void* * * incx (input) int * The stride for vector x. * * beta (input) const void* * * y (input) const void* * * incy (input) int * The stride for vector y. * */ { static const char routine_name[] = "BLAS_cgemv2_s_c"; int i, j; int iy, jx, kx, ky; int lenx, leny; int ai, aij; int incai, incaij; const float *a_i = a; const float *head_x_i = (float *) head_x; const float *tail_x_i = (float *) tail_x; float *y_i = (float *) y; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float a_elem; float x_elem[2]; float y_elem[2]; float prod[2]; float sum[2]; float sum2[2]; float tmp1[2]; float tmp2[2]; /* all error calls */ if (m < 0) BLAS_error(routine_name, -3, m, 0); else if (n <= 0) BLAS_error(routine_name, -4, n, 0); else if (incx == 0) BLAS_error(routine_name, -10, incx, 0); else if (incy == 0) BLAS_error(routine_name, -13, incy, 0); if ((order == blas_rowmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = lda; incaij = 1; } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) { lenx = m; leny = n; incai = 1; incaij = lda; } else if ((order == blas_colmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = 1; incaij = lda; } else { /* colmajor and blas_trans */ lenx = m; leny = n; incai = lda; incaij = 1; } if (lda < leny) BLAS_error(routine_name, -7, lda, NULL); incx *= 2; incy *= 2; if (incx > 0) kx = 0; else kx = (1 - lenx) * incx; if (incy > 0) ky = 0; else ky = (1 - leny) * incy; /* No extra-precision needed for alpha = 0 */ if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { iy = ky; for (i = 0; i < leny; i++) { y_i[iy] = 0.0; y_i[iy + 1] = 0.0; iy += incy; } } else if (!(beta_i[0] == 0.0 && beta_i[1] == 0.0)) { iy = ky; for (i = 0; i < leny; i++) { y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp1[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1]; tmp1[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0]; } y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; iy += incy; } } } else { /* alpha != 0 */ /* if beta = 0, we can save m multiplies: y = alpha*A*head_x + alpha*A*tail_x */ if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { /* save m more multiplies if alpha = 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; sum2[0] = sum2[1] = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } sum[0] = sum[0] + sum2[0]; sum[1] = sum[1] + sum2[1]; y_i[iy] = sum[0]; y_i[iy + 1] = sum[1]; ai += incai; iy += incy; } /* end for */ } else { /* alpha != 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; sum2[0] = sum2[1] = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } { tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } { tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1]; tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; ai += incai; iy += incy; } } } else { /* beta != 0 */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { /* save m multiplies if alpha = 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0;; sum2[0] = sum2[1] = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } sum[0] = sum[0] + sum2[0]; sum[1] = sum[1] + sum2[1]; y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp1[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1]; tmp1[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0]; } tmp2[0] = sum[0] + tmp1[0]; tmp2[1] = sum[1] + tmp1[1]; y_i[iy] = tmp2[0]; y_i[iy + 1] = tmp2[1]; ai += incai; iy += incy; } } else { /* alpha != 1, the most general form: y = alpha*A*head_x + alpha*A*tail_x + beta*y */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0;; sum2[0] = sum2[1] = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } { tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } { tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1]; tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1]; tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; ai += incai; iy += incy; } } } } }
void BLAS_zhbmv_c_z(enum blas_order_type order, enum blas_uplo_type uplo, int n, int k, const void *alpha, const void *a, int lda, const void *x, int incx, const void *beta, void *y, int incy) /* * Purpose * ======= * * This routines computes the matrix product: * * y <- alpha * A * x + beta * y * * where A is a hermitian band matrix. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input hermitian matrix A. * * uplo (input) enum blas_uplo_type * Determines which half of matrix A (upper or lower triangle) * is accessed. * * n (input) int * Dimension of A and size of vectors x, y. * * k (input) int * Number of subdiagonals ( = number of superdiagonals) * * alpha (input) const void* * * a (input) void* * Matrix A. * * lda (input) int * Leading dimension of matrix A. * * x (input) void* * Vector x. * * incx (input) int * Stride for vector x. * * beta (input) const void* * * y (input/output) void* * Vector y. * * incy (input) int * Stride for vector y. * * * Notes on storing a hermitian band matrix: * * Integers in the below arrays represent values of * type complex float. * * if we have a hermitian matrix: * * 1d 2 3 0 0 * 2# 4d 5 6 0 * 3# 5# 7d 8 9 * 0 6# 8# 10d 11 * 0 0 9# 11# 12d * * This matrix has n == 5, and k == 2. It can be stored in the * following ways: * * Notes for the examples: * Each column below represents a contigous vector. * Columns are strided by lda. * An asterisk (*) represents a position in the * matrix that is not used. * A pound sign (#) represents the conjugated form is stored * A d following an integer indicates that the imaginary * part of the number is assumed to be zero. * Note that the minimum lda (size of column) is 3 (k+1). * lda may be arbitrarily large; an lda > 3 would mean * there would be unused data at the bottom of the below * columns. * * blas_colmajor and blas_upper: * * * 3 6 9 * * 2 5 8 11 * 1d 4d 7d 10d 12d * * * blas_colmajor and blas_lower * 1d 4d 7d 10d 12d * 2# 5# 8# 11# * * 3# 6# 9# * * * * * blas_rowmajor and blas_upper * Columns here also represent contigous arrays. * 1d 4d 7d 10d 12d * 2 5 8 11 * * 3 6 9 * * * * * blas_rowmajor and blas_lower * Columns here also represent contigous arrays. * * * 3# 6# 9# * * 2# 5# 8# 11# * 1d 4d 7d 10d 12d * */ { /* Routine name */ static const char routine_name[] = "BLAS_zhbmv_c_z"; /* Integer Index Variables */ int i, j; int xi, yi; int aij, astarti, x_starti, y_starti; int incaij, incaij2; int n_i; int maxj_first, maxj_second; /* Input Matrices */ const float *a_i = (float *) a; const double *x_i = (double *) x; /* Output Vector */ double *y_i = (double *) y; /* Input Scalars */ double *alpha_i = (double *) alpha; double *beta_i = (double *) beta; /* Temporary Floating-Point Variables */ float a_elem[2]; double x_elem[2]; double y_elem[2]; double prod[2]; double sum[2]; double tmp1[2]; double tmp2[2]; /* Test for no-op */ if (n <= 0) { return; } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) { return; } /* Check for error conditions. */ if (order != blas_colmajor && order != blas_rowmajor) { BLAS_error(routine_name, -1, order, 0); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, 0); } if (n < 0) { BLAS_error(routine_name, -3, n, 0); } if (k < 0 || k > n) { BLAS_error(routine_name, -4, k, 0); } if ((lda < k + 1) || (lda < 1)) { BLAS_error(routine_name, -7, lda, 0); } if (incx == 0) { BLAS_error(routine_name, -9, incx, 0); } if (incy == 0) { BLAS_error(routine_name, -12, incy, 0); } /* Set Index Parameters */ n_i = n; if (((uplo == blas_upper) && (order == blas_colmajor)) || ((uplo == blas_lower) && (order == blas_rowmajor))) { incaij = 1; /* increment in first loop */ incaij2 = lda - 1; /* increment in second loop */ astarti = k; /* does not start on zero element */ } else { incaij = lda - 1; incaij2 = 1; astarti = 0; /* start on first element of array */ } /* Adjustment to increments (if any) */ incx *= 2; incy *= 2; astarti *= 2; incaij *= 2; incaij2 *= 2; if (incx < 0) { x_starti = (-n_i + 1) * incx; } else { x_starti = 0; } if (incy < 0) { y_starti = (-n_i + 1) * incy; } else { y_starti = 0; } /* alpha = 0. In this case, just return beta * y */ if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp1[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp1[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } else { /* determine the loop interation counts */ /* maj_first is number of elements done in first loop (this will increase by one over each column up to a limit) */ maxj_first = 0; /* maxj_second is number of elements done in second loop the first time */ maxj_second = MIN(k + 1, n_i); /* determine whether we conjugate in first loop or second loop */ if (uplo == blas_lower) { /* conjugate second loop */ /* Case alpha == 1. */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { sum[0] = sum[1] = 0.0; for (j = 0, aij = astarti, xi = x_starti; j < maxj_first; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aij]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij2; xi += incx; for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_i[yi] = sum[0]; y_i[yi + 1] = sum[1]; if (i + 1 >= (n_i - k)) { maxj_second--; } if (i >= k) { astarti += (incaij + incaij2); x_starti += incx; } else { maxj_first++; astarti += incaij2; } } } else { /* Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * y */ for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { sum[0] = sum[1] = 0.0; for (j = 0, aij = astarti, xi = x_starti; j < maxj_first; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aij]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij2; xi += incx; for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp2[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } tmp1[0] = sum[0]; tmp1[1] = sum[1]; tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; if (i + 1 >= (n_i - k)) { maxj_second--; } if (i >= k) { astarti += (incaij + incaij2); x_starti += incx; } else { maxj_first++; astarti += incaij2; } } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { sum[0] = sum[1] = 0.0; for (j = 0, aij = astarti, xi = x_starti; j < maxj_first; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aij]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij2; xi += incx; for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp2[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } { tmp1[0] = (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1]; tmp1[1] = (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0]; } tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; if (i + 1 >= (n_i - k)) { maxj_second--; } if (i >= k) { astarti += (incaij + incaij2); x_starti += incx; } else { maxj_first++; astarti += incaij2; } } } } else { /* conjugate first loop */ /* Case alpha == 1. */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { sum[0] = sum[1] = 0.0; for (j = 0, aij = astarti, xi = x_starti; j < maxj_first; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aij]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij2; xi += incx; for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_i[yi] = sum[0]; y_i[yi + 1] = sum[1]; if (i + 1 >= (n_i - k)) { maxj_second--; } if (i >= k) { astarti += (incaij + incaij2); x_starti += incx; } else { maxj_first++; astarti += incaij2; } } } else { /* Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * y */ for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { sum[0] = sum[1] = 0.0; for (j = 0, aij = astarti, xi = x_starti; j < maxj_first; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aij]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij2; xi += incx; for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp2[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } tmp1[0] = sum[0]; tmp1[1] = sum[1]; tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; if (i + 1 >= (n_i - k)) { maxj_second--; } if (i >= k) { astarti += (incaij + incaij2); x_starti += incx; } else { maxj_first++; astarti += incaij2; } } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { sum[0] = sum[1] = 0.0; for (j = 0, aij = astarti, xi = x_starti; j < maxj_first; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aij]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij2; xi += incx; for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double) a_elem[0] * x_elem[0] - (double) a_elem[1] * x_elem[1]; prod[1] = (double) a_elem[0] * x_elem[1] + (double) a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp2[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } { tmp1[0] = (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1]; tmp1[1] = (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0]; } tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; if (i + 1 >= (n_i - k)) { maxj_second--; } if (i >= k) { astarti += (incaij + incaij2); x_starti += incx; } else { maxj_first++; astarti += incaij2; } } } } } } /* end BLAS_zhbmv_c_z */
void BLAS_zsymv_d_z_x(enum blas_order_type order, enum blas_uplo_type uplo, int n, const void *alpha, const double *a, int lda, const void *x, int incx, const void *beta, void *y, int incy, enum blas_prec_type prec) /* * Purpose * ======= * * This routines computes the matrix product: * * y <- alpha * A * x + beta * y * * where A is a Symmetric matrix. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input symmetric matrix A. * * uplo (input) enum blas_uplo_type * Determines which half of matrix A (upper or lower triangle) * is accessed. * * n (input) int * Dimension of A and size of vectors x, y. * * alpha (input) const void* * * a (input) double* * Matrix A. * * lda (input) int * Leading dimension of matrix A. * * x (input) void* * Vector x. * * incx (input) int * Stride for vector x. * * beta (input) const void* * * y (input/output) void* * Vector y. * * incy (input) int * Stride for vector y. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { /* Routine name */ static const char routine_name[] = "BLAS_zsymv_d_z_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous:{ /* Integer Index Variables */ int i, k; int xi, yi; int aik, astarti, x_starti, y_starti; int incai; int incaik, incaik2; int n_i; /* Input Matrices */ const double *a_i = a; const double *x_i = (double *) x; /* Output Vector */ double *y_i = (double *) y; /* Input Scalars */ double *alpha_i = (double *) alpha; double *beta_i = (double *) beta; /* Temporary Floating-Point Variables */ double a_elem; double x_elem[2]; double y_elem[2]; double prod[2]; double sum[2]; double tmp1[2]; double tmp2[2]; /* Test for no-op */ if (n <= 0) { return; } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) { return; } /* Check for error conditions. */ if (lda < n) { BLAS_error(routine_name, -3, n, NULL); } if (incx == 0) { BLAS_error(routine_name, -8, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -11, incy, NULL); } /* Set Index Parameters */ n_i = n; if ((order == blas_colmajor && uplo == blas_upper) || (order == blas_rowmajor && uplo == blas_lower)) { incai = lda; incaik = 1; incaik2 = lda; } else { incai = 1; incaik = lda; incaik2 = 1; } /* Adjustment to increments (if any) */ incx *= 2; incy *= 2; if (incx < 0) { x_starti = (-n + 1) * incx; } else { x_starti = 0; } if (incy < 0) { y_starti = (-n + 1) * incy; } else { y_starti = 0; } /* alpha = 0. In this case, just return beta * y */ if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp1[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp1[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { /* Case alpha == 1. */ if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_i[yi] = sum[0]; y_i[yi + 1] = sum[1]; } } else { /* Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp2[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } tmp1[0] = sum[0]; tmp1[1] = sum[1]; tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem; prod[1] = x_elem[1] * a_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1]; tmp2[1] = (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0]; } { tmp1[0] = (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1]; tmp1[1] = (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0]; } tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } break; } case blas_prec_extra:{ /* Integer Index Variables */ int i, k; int xi, yi; int aik, astarti, x_starti, y_starti; int incai; int incaik, incaik2; int n_i; /* Input Matrices */ const double *a_i = a; const double *x_i = (double *) x; /* Output Vector */ double *y_i = (double *) y; /* Input Scalars */ double *alpha_i = (double *) alpha; double *beta_i = (double *) beta; /* Temporary Floating-Point Variables */ double a_elem; double x_elem[2]; double y_elem[2]; double head_prod[2], tail_prod[2]; double head_sum[2], tail_sum[2]; double head_tmp1[2], tail_tmp1[2]; double head_tmp2[2], tail_tmp2[2]; FPU_FIX_DECL; /* Test for no-op */ if (n <= 0) { return; } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) { return; } /* Check for error conditions. */ if (lda < n) { BLAS_error(routine_name, -3, n, NULL); } if (incx == 0) { BLAS_error(routine_name, -8, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -11, incy, NULL); } /* Set Index Parameters */ n_i = n; if ((order == blas_colmajor && uplo == blas_upper) || (order == blas_rowmajor && uplo == blas_lower)) { incai = lda; incaik = 1; incaik2 = lda; } else { incai = 1; incaik = lda; incaik2 = 1; } /* Adjustment to increments (if any) */ incx *= 2; incy *= 2; if (incx < 0) { x_starti = (-n + 1) * incx; } else { x_starti = 0; } if (incy < 0) { y_starti = (-n + 1) * incy; } else { y_starti = 0; } FPU_FIX_START; /* alpha = 0. In this case, just return beta * y */ if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { /* Compute complex-extra = complex-double * complex-double. */ double head_t1, tail_t1; double head_t2, tail_t2; /* Real part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[0] * split; a1 = con - y_elem[0]; a1 = con - a1; a2 = y_elem[0] - a1; con = beta_i[0] * split; b1 = con - beta_i[0]; b1 = con - b1; b2 = beta_i[0] - b1; head_t1 = y_elem[0] * beta_i[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[1] * split; a1 = con - y_elem[1]; a1 = con - a1; a2 = y_elem[1] - a1; con = beta_i[1] * split; b1 = con - beta_i[1]; b1 = con - b1; b2 = beta_i[1] - b1; head_t2 = y_elem[1] * beta_i[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp1[0] = head_t1; tail_tmp1[0] = tail_t1; /* Imaginary part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[1] * split; a1 = con - y_elem[1]; a1 = con - a1; a2 = y_elem[1] - a1; con = beta_i[0] * split; b1 = con - beta_i[0]; b1 = con - b1; b2 = beta_i[0] - b1; head_t1 = y_elem[1] * beta_i[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[0] * split; a1 = con - y_elem[0]; a1 = con - a1; a2 = y_elem[0] - a1; con = beta_i[1] * split; b1 = con - beta_i[1]; b1 = con - b1; b2 = beta_i[1] - b1; head_t2 = y_elem[0] * beta_i[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp1[1] = head_t1; tail_tmp1[1] = tail_t1; } y_i[yi] = head_tmp1[0]; y_i[yi + 1] = head_tmp1[1]; } } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { /* Case alpha == 1. */ if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { /* Compute complex-extra = complex-double * real. */ double head_t, tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[0] * split; b1 = con - x_elem[0]; b1 = con - b1; b2 = x_elem[0] - b1; head_t = a_elem * x_elem[0]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[0] = head_t; tail_prod[0] = tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[1] * split; b1 = con - x_elem[1]; b1 = con - b1; b2 = x_elem[1] - b1; head_t = a_elem * x_elem[1]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[1] = head_t; tail_prod[1] = tail_t; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { /* Compute complex-extra = complex-double * real. */ double head_t, tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[0] * split; b1 = con - x_elem[0]; b1 = con - b1; b2 = x_elem[0] - b1; head_t = a_elem * x_elem[0]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[0] = head_t; tail_prod[0] = tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[1] * split; b1 = con - x_elem[1]; b1 = con - b1; b2 = x_elem[1] - b1; head_t = a_elem * x_elem[1]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[1] = head_t; tail_prod[1] = tail_t; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } y_i[yi] = head_sum[0]; y_i[yi + 1] = head_sum[1]; } } else { /* Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { /* Compute complex-extra = complex-double * real. */ double head_t, tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[0] * split; b1 = con - x_elem[0]; b1 = con - b1; b2 = x_elem[0] - b1; head_t = a_elem * x_elem[0]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[0] = head_t; tail_prod[0] = tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[1] * split; b1 = con - x_elem[1]; b1 = con - b1; b2 = x_elem[1] - b1; head_t = a_elem * x_elem[1]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[1] = head_t; tail_prod[1] = tail_t; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { /* Compute complex-extra = complex-double * real. */ double head_t, tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[0] * split; b1 = con - x_elem[0]; b1 = con - b1; b2 = x_elem[0] - b1; head_t = a_elem * x_elem[0]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[0] = head_t; tail_prod[0] = tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[1] * split; b1 = con - x_elem[1]; b1 = con - b1; b2 = x_elem[1] - b1; head_t = a_elem * x_elem[1]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[1] = head_t; tail_prod[1] = tail_t; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { /* Compute complex-extra = complex-double * complex-double. */ double head_t1, tail_t1; double head_t2, tail_t2; /* Real part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[0] * split; a1 = con - y_elem[0]; a1 = con - a1; a2 = y_elem[0] - a1; con = beta_i[0] * split; b1 = con - beta_i[0]; b1 = con - b1; b2 = beta_i[0] - b1; head_t1 = y_elem[0] * beta_i[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[1] * split; a1 = con - y_elem[1]; a1 = con - a1; a2 = y_elem[1] - a1; con = beta_i[1] * split; b1 = con - beta_i[1]; b1 = con - b1; b2 = beta_i[1] - b1; head_t2 = y_elem[1] * beta_i[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp2[0] = head_t1; tail_tmp2[0] = tail_t1; /* Imaginary part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[1] * split; a1 = con - y_elem[1]; a1 = con - a1; a2 = y_elem[1] - a1; con = beta_i[0] * split; b1 = con - beta_i[0]; b1 = con - b1; b2 = beta_i[0] - b1; head_t1 = y_elem[1] * beta_i[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[0] * split; a1 = con - y_elem[0]; a1 = con - a1; a2 = y_elem[0] - a1; con = beta_i[1] * split; b1 = con - beta_i[1]; b1 = con - b1; b2 = beta_i[1] - b1; head_t2 = y_elem[0] * beta_i[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp2[1] = head_t1; tail_tmp2[1] = tail_t1; } head_tmp1[0] = head_sum[0]; tail_tmp1[0] = tail_sum[0]; head_tmp1[1] = head_sum[1]; tail_tmp1[1] = tail_sum[1]; { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_tmp2[0]; tail_a = tail_tmp2[0]; head_b = head_tmp1[0]; tail_b = tail_tmp1[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp1[0] = head_t; tail_tmp1[0] = tail_t; /* Imaginary part */ head_a = head_tmp2[1]; tail_a = tail_tmp2[1]; head_b = head_tmp1[1]; tail_b = tail_tmp1[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp1[1] = head_t; tail_tmp1[1] = tail_t; } y_i[yi] = head_tmp1[0]; y_i[yi + 1] = head_tmp1[1]; } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { /* Compute complex-extra = complex-double * real. */ double head_t, tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[0] * split; b1 = con - x_elem[0]; b1 = con - b1; b2 = x_elem[0] - b1; head_t = a_elem * x_elem[0]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[0] = head_t; tail_prod[0] = tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[1] * split; b1 = con - x_elem[1]; b1 = con - b1; b2 = x_elem[1] - b1; head_t = a_elem * x_elem[1]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[1] = head_t; tail_prod[1] = tail_t; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { /* Compute complex-extra = complex-double * real. */ double head_t, tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[0] * split; b1 = con - x_elem[0]; b1 = con - b1; b2 = x_elem[0] - b1; head_t = a_elem * x_elem[0]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[0] = head_t; tail_prod[0] = tail_t; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = x_elem[1] * split; b1 = con - x_elem[1]; b1 = con - b1; b2 = x_elem[1] - b1; head_t = a_elem * x_elem[1]; tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2; } head_prod[1] = head_t; tail_prod[1] = tail_t; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { /* Compute complex-extra = complex-double * complex-double. */ double head_t1, tail_t1; double head_t2, tail_t2; /* Real part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[0] * split; a1 = con - y_elem[0]; a1 = con - a1; a2 = y_elem[0] - a1; con = beta_i[0] * split; b1 = con - beta_i[0]; b1 = con - b1; b2 = beta_i[0] - b1; head_t1 = y_elem[0] * beta_i[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[1] * split; a1 = con - y_elem[1]; a1 = con - a1; a2 = y_elem[1] - a1; con = beta_i[1] * split; b1 = con - beta_i[1]; b1 = con - b1; b2 = beta_i[1] - b1; head_t2 = y_elem[1] * beta_i[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp2[0] = head_t1; tail_tmp2[0] = tail_t1; /* Imaginary part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[1] * split; a1 = con - y_elem[1]; a1 = con - a1; a2 = y_elem[1] - a1; con = beta_i[0] * split; b1 = con - beta_i[0]; b1 = con - b1; b2 = beta_i[0] - b1; head_t1 = y_elem[1] * beta_i[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem[0] * split; a1 = con - y_elem[0]; a1 = con - a1; a2 = y_elem[0] - a1; con = beta_i[1] * split; b1 = con - beta_i[1]; b1 = con - b1; b2 = beta_i[1] - b1; head_t2 = y_elem[0] * beta_i[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp2[1] = head_t1; tail_tmp2[1] = tail_t1; } { /* Compute complex-extra = complex-extra * complex-double. */ double head_a0, tail_a0; double head_a1, tail_a1; double head_t1, tail_t1; double head_t2, tail_t2; head_a0 = head_sum[0]; tail_a0 = tail_sum[0]; head_a1 = head_sum[1]; tail_a1 = tail_sum[1]; /* real part */ { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = alpha_i[0] * split; b1 = con - alpha_i[0]; b1 = con - b1; b2 = alpha_i[0] - b1; c11 = head_a0 * alpha_i[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * alpha_i[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = alpha_i[1] * split; b1 = con - alpha_i[1]; b1 = con - b1; b2 = alpha_i[1] - b1; c11 = head_a1 * alpha_i[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * alpha_i[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp1[0] = head_t1; tail_tmp1[0] = tail_t1; /* imaginary part */ { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = alpha_i[0] * split; b1 = con - alpha_i[0]; b1 = con - b1; b2 = alpha_i[0] - b1; c11 = head_a1 * alpha_i[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * alpha_i[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = alpha_i[1] * split; b1 = con - alpha_i[1]; b1 = con - b1; b2 = alpha_i[1] - b1; c11 = head_a0 * alpha_i[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * alpha_i[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp1[1] = head_t1; tail_tmp1[1] = tail_t1; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_tmp2[0]; tail_a = tail_tmp2[0]; head_b = head_tmp1[0]; tail_b = tail_tmp1[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp1[0] = head_t; tail_tmp1[0] = tail_t; /* Imaginary part */ head_a = head_tmp2[1]; tail_a = tail_tmp2[1]; head_b = head_tmp1[1]; tail_b = tail_tmp1[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp1[1] = head_t; tail_tmp1[1] = tail_t; } y_i[yi] = head_tmp1[0]; y_i[yi + 1] = head_tmp1[1]; } } FPU_FIX_STOP; break; } } } /* end BLAS_zsymv_d_z_x */
void BLAS_dsymv_s_s(enum blas_order_type order, enum blas_uplo_type uplo, int n , double alpha, const float *a, int lda, const float *x, int incx, double beta, double *y, int incy) /* * Purpose * ======= * * This routines computes the matrix product: * * y <- alpha * A * x + beta * y * * where A is a Symmetric matrix. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input symmetric matrix A. * * uplo (input) enum blas_uplo_type * Determines which half of matrix A (upper or lower triangle) * is accessed. * * n (input) int * Dimension of A and size of vectors x, y. * * alpha (input) double * * a (input) float* * Matrix A. * * lda (input) int * Leading dimension of matrix A. * * x (input) float* * Vector x. * * incx (input) int * Stride for vector x. * * beta (input) double * * y (input/output) double* * Vector y. * * incy (input) int * Stride for vector y. * */ { /* Routine name */ static const char routine_name[] = "BLAS_dsymv_s_s"; /* Integer Index Variables */ int i , k; int xi , yi; int aik , astarti, x_starti, y_starti; int incai; int incaik , incaik2; int n_i; /* Input Matrices */ const float *a_i = a; const float *x_i = x; /* Output Vector */ double *y_i = y; /* Input Scalars */ double alpha_i = alpha; double beta_i = beta; /* Temporary Floating-Point Variables */ float a_elem; float x_elem; double y_elem; double prod; double sum; double tmp1; double tmp2; /* Test for no-op */ if (n <= 0) { return; } if (alpha_i == 0.0 && beta_i == 1.0) { return; } /* Check for error conditions. */ if (lda < n) { BLAS_error(routine_name, -3, n, NULL); } if (incx == 0) { BLAS_error(routine_name, -8, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -11, incy, NULL); } /* Set Index Parameters */ n_i = n; if ((order == blas_colmajor && uplo == blas_upper) || (order == blas_rowmajor && uplo == blas_lower)) { incai = lda; incaik = 1; incaik2 = lda; } else { incai = 1; incaik = lda; incaik2 = 1; } /* Adjustment to increments (if any) */ if (incx < 0) { x_starti = (-n + 1) * incx; } else { x_starti = 0; } if (incy < 0) { y_starti = (-n + 1) * incy; } else { y_starti = 0; } /* alpha = 0. In this case, just return beta * y */ if (alpha_i == 0.0) { for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { y_elem = y_i[yi]; tmp1 = y_elem * beta_i; y_i[yi] = tmp1; } } else if (alpha_i == 1.0) { /* Case alpha == 1. */ if (beta_i == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { sum = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem = x_i[xi]; prod = (double)a_elem *x_elem; sum = sum + prod; } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem = x_i[xi]; prod = (double)a_elem *x_elem; sum = sum + prod; } y_i[yi] = sum; } } else { /* * Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { sum = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem = x_i[xi]; prod = (double)a_elem *x_elem; sum = sum + prod; } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem = x_i[xi]; prod = (double)a_elem *x_elem; sum = sum + prod; } y_elem = y_i[yi]; tmp2 = y_elem * beta_i; tmp1 = sum; tmp1 = tmp2 + tmp1; y_i[yi] = tmp1; } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incai) { sum = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem = a_i[aik]; x_elem = x_i[xi]; prod = (double)a_elem *x_elem; sum = sum + prod; } for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem = a_i[aik]; x_elem = x_i[xi]; prod = (double)a_elem *x_elem; sum = sum + prod; } y_elem = y_i[yi]; tmp2 = y_elem * beta_i; tmp1 = sum * alpha_i; tmp1 = tmp2 + tmp1; y_i[yi] = tmp1; } } } /* end BLAS_dsymv_s_s */
void BLAS_dwaxpby_s_d(int n, double alpha, const float *x, int incx, double beta , const double *y, int incy, double *w, int incw) /* * Purpose * ======= * * This routine computes: * * w <- alpha * x + beta * y * * Arguments * ========= * * n (input) int * The length of vectors x, y, and w. * * alpha (input) double * * x (input) const float* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * * beta (input) double * * y (input) double* * Array of length n. * * incy (input) int * The stride used to access components y[i]. * * w (output) double* * Array of length n. * * incw (input) int * The stride used to write components w[i]. * */ { char *routine_name = "BLAS_dwaxpby_s_d"; int i , ix = 0, iy = 0, iw = 0; double *w_i = w; const float *x_i = x; const double *y_i = y; double alpha_i = alpha; double beta_i = beta; float x_ii; double y_ii; double tmpx; double tmpy; /* Test the input parameters. */ if (incx == 0) BLAS_error(routine_name, -4, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -7, incy, NULL); else if (incw == 0) BLAS_error(routine_name, -9, incw, NULL); /* Immediate return */ if (n <= 0) { return; } if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; if (incw < 0) iw = (-n + 1) * incw; for (i = 0; i < n; ++i) { x_ii = x_i[ix]; y_ii = y_i[iy]; tmpx = alpha_i * x_ii; /* tmpx = alpha * x[ix] */ tmpy = beta_i * y_ii; /* tmpy = beta * y[iy] */ tmpy = tmpy + tmpx; w_i[iw] = tmpy; ix += incx; iy += incy; iw += incw; } /* endfor */ }
void BLAS_zaxpby_c_x(int n, const void *alpha, const void *x, int incx, const void *beta, void *y, int incy , enum blas_prec_type prec) /* * Purpose * ======= * * This routine computes: * * y <- alpha * x + beta * y. * * Arguments * ========= * * n (input) int * The length of vectors x and y. * * alpha (input) const void* * * x (input) const void* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * * beta (input) const void* * * y (input) void* * Array of length n. * * incy (input) int * The stride used to access components y[i]. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { static const char routine_name[] = "BLAS_zaxpby_c_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous: { int i , ix = 0, iy = 0; const float *x_i = (float *)x; double *y_i = (double *)y; double *alpha_i = (double *)alpha; double *beta_i = (double *)beta; float x_ii[2]; double y_ii[2]; double tmpx[2]; double tmpy[2]; /* Test the input parameters. */ if (incx == 0) BLAS_error(routine_name, -4, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -7, incy, NULL); /* Immediate return */ if (n <= 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0))) return; incx *= 2; incy *= 2; if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; for (i = 0; i < n; ++i) { x_ii[0] = x_i[ix]; x_ii[1] = x_i[ix + 1]; y_ii[0] = y_i[iy]; y_ii[1] = y_i[iy + 1]; { tmpx[0] = (double)alpha_i[0] * x_ii[0] - (double)alpha_i[1] * x_ii[1]; tmpx[1] = (double)alpha_i[0] * x_ii[1] + (double)alpha_i[1] * x_ii[0]; } /* tmpx = alpha * x[ix] */ { tmpy[0] = (double)beta_i[0] * y_ii[0] - (double)beta_i[1] * y_ii[1]; tmpy[1] = (double)beta_i[0] * y_ii[1] + (double)beta_i[1] * y_ii[0]; } /* tmpy = beta * y[iy] */ tmpy[0] = tmpy[0] + tmpx[0]; tmpy[1] = tmpy[1] + tmpx[1]; y_i[iy] = tmpy[0]; y_i[iy + 1] = tmpy[1]; ix += incx; iy += incy; } /* endfor */ } break; case blas_prec_extra: { int i , ix = 0, iy = 0; const float *x_i = (float *)x; double *y_i = (double *)y; double *alpha_i = (double *)alpha; double *beta_i = (double *)beta; float x_ii[2]; double y_ii[2]; double head_tmpx[2], tail_tmpx[2]; double head_tmpy[2], tail_tmpy[2]; FPU_FIX_DECL; /* Test the input parameters. */ if (incx == 0) BLAS_error(routine_name, -4, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -7, incy, NULL); /* Immediate return */ if (n <= 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0))) return; FPU_FIX_START; incx *= 2; incy *= 2; if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; for (i = 0; i < n; ++i) { x_ii[0] = x_i[ix]; x_ii[1] = x_i[ix + 1]; y_ii[0] = y_i[iy]; y_ii[1] = y_i[iy + 1]; { double cd [2]; cd[0] = (double)x_ii[0]; cd[1] = (double)x_ii[1]; { /* Compute complex-extra = complex-double * complex-double. */ double head_t1, tail_t1; double head_t2, tail_t2; /* Real part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = alpha_i[0] * split; a1 = con - alpha_i[0]; a1 = con - a1; a2 = alpha_i[0] - a1; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; head_t1 = alpha_i[0] * cd[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = alpha_i[1] * split; a1 = con - alpha_i[1]; a1 = con - a1; a2 = alpha_i[1] - a1; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; head_t2 = alpha_i[1] * cd[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmpx[0] = head_t1; tail_tmpx[0] = tail_t1; /* Imaginary part */ { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = alpha_i[1] * split; a1 = con - alpha_i[1]; a1 = con - a1; a2 = alpha_i[1] - a1; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; head_t1 = alpha_i[1] * cd[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = alpha_i[0] * split; a1 = con - alpha_i[0]; a1 = con - a1; a2 = alpha_i[0] - a1; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; head_t2 = alpha_i[0] * cd[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmpx[1] = head_t1; tail_tmpx[1] = tail_t1; } } /* tmpx = alpha * x[ix] */ { /* Compute complex-extra = complex-double * complex-double. */ double head_t1, tail_t1; double head_t2, tail_t2; /* Real part */ { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = beta_i[0] * split; a1 = con - beta_i[0]; a1 = con - a1; a2 = beta_i[0] - a1; con = y_ii[0] * split; b1 = con - y_ii[0]; b1 = con - b1; b2 = y_ii[0] - b1; head_t1 = beta_i[0] * y_ii[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = beta_i[1] * split; a1 = con - beta_i[1]; a1 = con - a1; a2 = beta_i[1] - a1; con = y_ii[1] * split; b1 = con - y_ii[1]; b1 = con - b1; b2 = y_ii[1] - b1; head_t2 = beta_i[1] * y_ii[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmpy[0] = head_t1; tail_tmpy[0] = tail_t1; /* Imaginary part */ { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = beta_i[1] * split; a1 = con - beta_i[1]; a1 = con - a1; a2 = beta_i[1] - a1; con = y_ii[0] * split; b1 = con - y_ii[0]; b1 = con - b1; b2 = y_ii[0] - b1; head_t1 = beta_i[1] * y_ii[0]; tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = beta_i[0] * split; a1 = con - beta_i[0]; a1 = con - a1; a2 = beta_i[0] - a1; con = y_ii[1] * split; b1 = con - y_ii[1]; b1 = con - b1; b2 = y_ii[1] - b1; head_t2 = beta_i[0] * y_ii[1]; tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmpy[1] = head_t1; tail_tmpy[1] = tail_t1; } /* tmpy = beta * y[iy] */ { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_tmpy[0]; tail_a = tail_tmpy[0]; head_b = head_tmpx[0]; tail_b = tail_tmpx[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmpy[0] = head_t; tail_tmpy[0] = tail_t; /* Imaginary part */ head_a = head_tmpy[1]; tail_a = tail_tmpy[1]; head_b = head_tmpx[1]; tail_b = tail_tmpx[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmpy[1] = head_t; tail_tmpy[1] = tail_t; } y_i[iy] = head_tmpy[0]; y_i[iy + 1] = head_tmpy[1]; ix += incx; iy += incy; } /* endfor */ FPU_FIX_STOP; } break; } } /* end BLAS_zaxpby_c_x */
void BLAS_cgbmv_c_s(enum blas_order_type order, enum blas_trans_type trans, int m, int n, int kl, int ku, const void *alpha, const void *a, int lda, const float *x, int incx, const void *beta, void *y, int incy) /* * Purpose * ======= * * gbmv computes y = alpha * A * x + beta * y, where * * A is a m x n banded matrix * x is a n x 1 vector * y is a m x 1 vector * alpha and beta are scalars * * * Arguments * ========= * * order (input) blas_order_type * Order of AP; row or column major * * trans (input) blas_trans_type * Transpose of AB; no trans, * trans, or conjugate trans * * m (input) int * Dimension of AB * * n (input) int * Dimension of AB and the length of vector x * * kl (input) int * Number of lower diagnols of AB * * ku (input) int * Number of upper diagnols of AB * * alpha (input) const void* * * AB (input) void* * * lda (input) int * Leading dimension of AB * lda >= ku + kl + 1 * * x (input) float* * * incx (input) int * The stride for vector x. * * beta (input) const void* * * y (input/output) void* * * incy (input) int * The stride for vector y. * * * LOCAL VARIABLES * =============== * * As an example, these variables are described on the mxn, column * major, banded matrix described in section 2.2.3 of the specification * * astart indexes first element in A where computation begins * * incai1 indexes first element in row where row is less than lbound * * incai2 indexes first element in row where row exceeds lbound * * lbound denotes the number of rows before first element shifts * * rbound denotes the columns where there is blank space * * ra index of the rightmost element for a given row * * la index of leftmost elements for a given row * * ra - la width of a row * * rbound * la ra ____|_____ * | | | | * | a00 a01 * * * * lbound -| a10 a11 a12 * * * | a20 a21 a22 a23 * * * a31 a32 a33 a34 * * * a42 a43 a44 * * Varations on order and transpose have been implemented by modifying these * local variables. * */ { static const char routine_name[] = "BLAS_cgbmv_c_s"; int ky, iy, kx, jx, j, i, rbound, lbound, ra, la, lenx, leny; int incaij, aij, incai1, incai2, astart, ai; float *y_i = (float *) y; const float *a_i = (float *) a; const float *x_i = x; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float tmp1[2]; float tmp2[2]; float result[2]; float sum[2]; float prod[2]; float a_elem[2]; float x_elem; float y_elem[2]; if (order != blas_colmajor && order != blas_rowmajor) BLAS_error(routine_name, -1, order, NULL); if (trans != blas_no_trans && trans != blas_trans && trans != blas_conj_trans) { BLAS_error(routine_name, -2, trans, NULL); } if (m < 0) BLAS_error(routine_name, -3, m, NULL); if (n < 0) BLAS_error(routine_name, -4, n, NULL); if (kl < 0 || kl >= m) BLAS_error(routine_name, -5, kl, NULL); if (ku < 0 || ku >= n) BLAS_error(routine_name, -6, ku, NULL); if (lda < kl + ku + 1) BLAS_error(routine_name, -9, lda, NULL); if (incx == 0) BLAS_error(routine_name, -11, incx, NULL); if (incy == 0) BLAS_error(routine_name, -14, incy, NULL); if ((m == 0) || (n == 0) || (((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0))))) return; if (trans == blas_no_trans) { lenx = n; leny = m; } else { /* change back */ lenx = m; leny = n; } if (incx < 0) { kx = -(lenx - 1) * incx; } else { kx = 0; } if (incy < 0) { ky = -(leny - 1) * incy; } else { ky = 0; } /* if alpha = 0, return y = y*beta */ if ((order == blas_colmajor) && (trans == blas_no_trans)) { astart = ku; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else if ((order == blas_colmajor) && (trans != blas_no_trans)) { astart = ku; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = ku; rbound = m - kl - 1; ra = kl; } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) { astart = kl; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else { /* rowmajor and blas_trans */ astart = kl; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = ku; rbound = m - kl - 1; ra = kl; } incy *= 2; incaij *= 2; incai1 *= 2; incai2 *= 2; astart *= 2; ky *= 2; la = 0; ai = astart; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; aij = ai; jx = kx; if (trans != blas_conj_trans) { for (j = ra - la; j >= 0; j--) { x_elem = x_i[jx]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; { prod[0] = a_elem[0] * x_elem; prod[1] = a_elem[1] * x_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } } else { for (j = ra - la; j >= 0; j--) { x_elem = x_i[jx]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; { prod[0] = a_elem[0] * x_elem; prod[1] = a_elem[1] * x_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } } { tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp2[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1]; tmp2[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0]; } result[0] = tmp1[0] + tmp2[0]; result[1] = tmp1[1] + tmp2[1]; y_i[iy] = result[0]; y_i[iy + 1] = result[1]; iy += incy; if (i >= lbound) { kx += incx; ai += incai2; la++; } else { ai += incai1; } if (i < rbound) { ra++; } } } /* end GEMV_NAME(c, c, s, ) */
void BLAS_ctrmv_s_x(enum blas_order_type order, enum blas_uplo_type uplo, enum blas_trans_type trans, enum blas_diag_type diag, int n, const void *alpha, const float *T, int ldt, void *x, int incx, enum blas_prec_type prec) /* * Purpose * ======= * * Computes x <-- alpha * T * x, where T is a triangular matrix. * * Arguments * ========= * * order (input) enum blas_order_type * column major, row major * * uplo (input) enum blas_uplo_type * upper, lower * * trans (input) enum blas_trans_type * no trans, trans, conj trans * * diag (input) enum blas_diag_type * unit, non unit * * n (input) int * the dimension of T * * alpha (input) const void* * * T (input) float* * Triangular matrix * * ldt (input) int * Leading dimension of T * * x (input) const void* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { static const char routine_name[] = "BLAS_ctrmv_s_x"; switch (prec) { case blas_prec_single:{ int i , j; /* used to idx matrix */ int xj , xj0; int ti , tij, tij0; int inc_ti, inc_tij; int inc_x; const float *T_i = T; /* internal matrix T */ float *x_i = (float *)x; /* internal x */ float *alpha_i = (float *)alpha; /* internal alpha */ float t_elem; float x_elem[2]; float prod[2]; float sum [2]; float tmp [2]; /* all error calls */ if ((order != blas_rowmajor && order != blas_colmajor) || (uplo != blas_upper && uplo != blas_lower) || (trans != blas_trans && trans != blas_no_trans && trans != blas_conj_trans) || (diag != blas_non_unit_diag && diag != blas_unit_diag) || (ldt < n) || (incx == 0)) { BLAS_error(routine_name, 0, 0, NULL); } else if (n <= 0) { BLAS_error(routine_name, -4, n, NULL); } else if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = -1; } else { inc_ti = 1; inc_tij = -ldt; } } else { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = 1; } else { inc_ti = -1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = ldt; } else { inc_ti = -ldt; inc_tij = 1; } } else { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = -ldt; } else { inc_ti = ldt; inc_tij = -1; } } } inc_x *= 2; xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x); if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { xj = xj0; for (j = 0; j < n; j++) { x_i[xj] = 0.0; x_i[xj + 1] = 0.0; xj += inc_x; } } else { if (diag == blas_unit_diag) { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum[0] = sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < (n - 1); j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { prod[0] = x_elem[0] * t_elem; prod[1] = x_elem[1] * t_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; xj += inc_x; tij += inc_tij; } x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; sum[0] = sum[0] + x_elem[0]; sum[1] = sum[1] + x_elem[1]; if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj] = sum[0]; x_i[xj + 1] = sum[1]; } else { { tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } x_i[xj] = tmp[0]; x_i[xj + 1] = tmp[1]; } ti += inc_ti; } } else { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum[0] = sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < n; j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { prod[0] = x_elem[0] * t_elem; prod[1] = x_elem[1] * t_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; xj += inc_x; tij += inc_tij; } if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj - inc_x] = sum[0]; x_i[xj - inc_x + 1] = sum[1]; } else { { tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } x_i[xj - inc_x] = tmp[0]; x_i[xj - inc_x + 1] = tmp[1]; } ti += inc_ti; } } } break; } case blas_prec_double: case blas_prec_indigenous:{ int i , j; /* used to idx matrix */ int xj , xj0; int ti , tij, tij0; int inc_ti, inc_tij; int inc_x; const float *T_i = T; /* internal matrix T */ float *x_i = (float *)x; /* internal x */ float *alpha_i = (float *)alpha; /* internal alpha */ float t_elem; float x_elem[2]; double prod[2]; double sum[2]; double tmp[2]; /* all error calls */ if ((order != blas_rowmajor && order != blas_colmajor) || (uplo != blas_upper && uplo != blas_lower) || (trans != blas_trans && trans != blas_no_trans && trans != blas_conj_trans) || (diag != blas_non_unit_diag && diag != blas_unit_diag) || (ldt < n) || (incx == 0)) { BLAS_error(routine_name, 0, 0, NULL); } else if (n <= 0) { BLAS_error(routine_name, -4, n, NULL); } else if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = -1; } else { inc_ti = 1; inc_tij = -ldt; } } else { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = 1; } else { inc_ti = -1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = ldt; } else { inc_ti = -ldt; inc_tij = 1; } } else { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = -ldt; } else { inc_ti = ldt; inc_tij = -1; } } } inc_x *= 2; xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x); if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { xj = xj0; for (j = 0; j < n; j++) { x_i[xj] = 0.0; x_i[xj + 1] = 0.0; xj += inc_x; } } else { if (diag == blas_unit_diag) { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum[0] = sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < (n - 1); j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { prod[0] = (double)x_elem[0] * t_elem; prod[1] = (double)x_elem[1] * t_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; xj += inc_x; tij += inc_tij; } x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; sum[0] = sum[0] + x_elem[0]; sum[1] = sum[1] + x_elem[1]; if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj] = sum[0]; x_i[xj + 1] = sum[1]; } else { { tmp[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } x_i[xj] = tmp[0]; x_i[xj + 1] = tmp[1]; } ti += inc_ti; } } else { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum[0] = sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < n; j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { prod[0] = (double)x_elem[0] * t_elem; prod[1] = (double)x_elem[1] * t_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; xj += inc_x; tij += inc_tij; } if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj - inc_x] = sum[0]; x_i[xj - inc_x + 1] = sum[1]; } else { { tmp[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } x_i[xj - inc_x] = tmp[0]; x_i[xj - inc_x + 1] = tmp[1]; } ti += inc_ti; } } } break; } case blas_prec_extra:{ int i , j; /* used to idx matrix */ int xj , xj0; int ti , tij, tij0; int inc_ti, inc_tij; int inc_x; const float *T_i = T; /* internal matrix T */ float *x_i = (float *)x; /* internal x */ float *alpha_i = (float *)alpha; /* internal alpha */ float t_elem; float x_elem[2]; double head_prod[2], tail_prod[2]; double head_sum[2], tail_sum[2]; double head_tmp[2], tail_tmp[2]; FPU_FIX_DECL; FPU_FIX_START; /* all error calls */ if ((order != blas_rowmajor && order != blas_colmajor) || (uplo != blas_upper && uplo != blas_lower) || (trans != blas_trans && trans != blas_no_trans && trans != blas_conj_trans) || (diag != blas_non_unit_diag && diag != blas_unit_diag) || (ldt < n) || (incx == 0)) { BLAS_error(routine_name, 0, 0, NULL); } else if (n <= 0) { BLAS_error(routine_name, -4, n, NULL); } else if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = -1; } else { inc_ti = 1; inc_tij = -ldt; } } else { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = 1; } else { inc_ti = -1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = ldt; } else { inc_ti = -ldt; inc_tij = 1; } } else { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = -ldt; } else { inc_ti = ldt; inc_tij = -1; } } } inc_x *= 2; xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x); if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { xj = xj0; for (j = 0; j < n; j++) { x_i[xj] = 0.0; x_i[xj + 1] = 0.0; xj += inc_x; } } else { if (diag == blas_unit_diag) { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < (n - 1); j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { head_prod[0] = (double)x_elem[0] * t_elem; tail_prod[0] = 0.0; head_prod[1] = (double)x_elem[1] * t_elem; tail_prod[1] = 0.0; } { double head_t , tail_t; double head_a , tail_a; double head_b , tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } xj += inc_x; tij += inc_tij; } x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { double cd [2]; cd[0] = (double)x_elem[0]; cd[1] = (double)x_elem[1]; { double head_t , tail_t; double head_a , tail_a; head_a = head_sum[0]; tail_a = tail_sum[0]; { /* Compute double-double = double-double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = head_a + cd[0]; e = t1 - head_a; t2 = ((cd[0] - e) + (head_a - (t1 - e))) + tail_a; /* The result is t1 + t2, after normalization. */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; head_a = head_sum[1]; tail_a = tail_sum[1]; { /* Compute double-double = double-double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = head_a + cd[1]; e = t1 - head_a; t2 = ((cd[1] - e) + (head_a - (t1 - e))) + tail_a; /* The result is t1 + t2, after normalization. */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } } if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj] = head_sum[0]; x_i[xj + 1] = head_sum[1]; } else { { double cd [2]; cd[0] = (double)alpha_i[0]; cd[1] = (double)alpha_i[1]; { /* Compute complex-extra = complex-extra * complex-double. */ double head_a0, tail_a0; double head_a1, tail_a1; double head_t1, tail_t1; double head_t2, tail_t2; head_a0 = head_sum[0]; tail_a0 = tail_sum[0]; head_a1 = head_sum[1]; tail_a1 = tail_sum[1]; /* real part */ { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a0 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a1 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp[0] = head_t1; tail_tmp[0] = tail_t1; /* imaginary part */ { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a1 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a0 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp[1] = head_t1; tail_tmp[1] = tail_t1; } } x_i[xj] = head_tmp[0]; x_i[xj + 1] = head_tmp[1]; } ti += inc_ti; } } else { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < n; j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { head_prod[0] = (double)x_elem[0] * t_elem; tail_prod[0] = 0.0; head_prod[1] = (double)x_elem[1] * t_elem; tail_prod[1] = 0.0; } { double head_t , tail_t; double head_a , tail_a; double head_b , tail_b; /* Real part */ head_a = head_sum[0]; tail_a = tail_sum[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[0] = head_t; tail_sum[0] = tail_t; /* Imaginary part */ head_a = head_sum[1]; tail_a = tail_sum[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum[1] = head_t; tail_sum[1] = tail_t; } xj += inc_x; tij += inc_tij; } if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj - inc_x] = head_sum[0]; x_i[xj - inc_x + 1] = head_sum[1]; } else { { double cd [2]; cd[0] = (double)alpha_i[0]; cd[1] = (double)alpha_i[1]; { /* Compute complex-extra = complex-extra * complex-double. */ double head_a0, tail_a0; double head_a1, tail_a1; double head_t1, tail_t1; double head_t2, tail_t2; head_a0 = head_sum[0]; tail_a0 = tail_sum[0]; head_a1 = head_sum[1]; tail_a1 = tail_sum[1]; /* real part */ { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a0 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a1 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp[0] = head_t1; tail_tmp[0] = tail_t1; /* imaginary part */ { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a1 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a0 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp[1] = head_t1; tail_tmp[1] = tail_t1; } } x_i[xj - inc_x] = head_tmp[0]; x_i[xj - inc_x + 1] = head_tmp[1]; } ti += inc_ti; } } } FPU_FIX_STOP; break; } } }
void BLAS_chemv2_c_s(enum blas_order_type order, enum blas_uplo_type uplo, int n, const void *alpha, const void *a, int lda, const float *x_head, const float *x_tail, int incx, const void *beta, const float *y, int incy) /* * Purpose * ======= * * This routines computes the matrix product: * * y <- alpha * A * (x_head + x_tail) + beta * y * * where A is a complex Hermitian matrix. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input symmetric matrix A. * * uplo (input) enum blas_uplo_type * Determines which half of matrix A (upper or lower triangle) * is accessed. * * n (input) int * Dimension of A and size of vectors x, y. * * alpha (input) const void* * * a (input) void* * Matrix A. * * lda (input) int * Leading dimension of matrix A. * * x_head (input) float* * Vector x_head * * x_tail (input) float* * Vector x_tail * * incx (input) int * Stride for vector x. * * beta (input) const void* * * y (input) float* * Vector y. * * incy (input) int * Stride for vector y. * */ { /* Routine name */ const char routine_name[] = "BLAS_chemv2_c_s"; int i, j; int xi, yi, xi0, yi0; int aij, ai; int incai; int incaij, incaij2; const float *a_i = (float *) a; const float *x_head_i = x_head; const float *x_tail_i = x_tail; float *y_i = (float *) y; float *alpha_i = (float *) alpha; float *beta_i = (float *) beta; float a_elem[2]; float x_elem; float y_elem[2]; float diag_elem; float prod1[2]; float prod2[2]; float sum1[2]; float sum2[2]; float tmp1[2]; float tmp2[2]; float tmp3[2]; /* Test for no-op */ if (n <= 0) { return; } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) { return; } /* Check for error conditions. */ if (n < 0) { BLAS_error(routine_name, -3, n, NULL); } if (lda < n) { BLAS_error(routine_name, -6, n, NULL); } if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -12, incy, NULL); } if ((order == blas_colmajor && uplo == blas_upper) || (order == blas_rowmajor && uplo == blas_lower)) { incai = lda; incaij = 1; incaij2 = lda; } else { incai = 1; incaij = lda; incaij2 = 1; } incy *= 2; incai *= 2; incaij *= 2; incaij2 *= 2; xi0 = (incx > 0) ? 0 : ((-n + 1) * incx); yi0 = (incy > 0) ? 0 : ((-n + 1) * incy); /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */ if (uplo == blas_lower) { for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) { sum1[0] = sum1[1] = 0.0; sum2[0] = sum2[1] = 0.0; for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem = x_head_i[xi]; { prod1[0] = a_elem[0] * x_elem; prod1[1] = a_elem[1] * x_elem; } sum1[0] = sum1[0] + prod1[0]; sum1[1] = sum1[1] + prod1[1]; x_elem = x_tail_i[xi]; { prod2[0] = a_elem[0] * x_elem; prod2[1] = a_elem[1] * x_elem; } sum2[0] = sum2[0] + prod2[0]; sum2[1] = sum2[1] + prod2[1]; } diag_elem = a_i[aij]; x_elem = x_head_i[xi]; prod1[0] = x_elem * diag_elem; prod1[1] = 0.0; sum1[0] = sum1[0] + prod1[0]; sum1[1] = sum1[1] + prod1[1]; x_elem = x_tail_i[xi]; prod2[0] = x_elem * diag_elem; prod2[1] = 0.0; sum2[0] = sum2[0] + prod2[0]; sum2[1] = sum2[1] + prod2[1]; j++; aij += incaij2; xi += incx; for (; j < n; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem = x_head_i[xi]; { prod1[0] = a_elem[0] * x_elem; prod1[1] = a_elem[1] * x_elem; } sum1[0] = sum1[0] + prod1[0]; sum1[1] = sum1[1] + prod1[1]; x_elem = x_tail_i[xi]; { prod2[0] = a_elem[0] * x_elem; prod2[1] = a_elem[1] * x_elem; } sum2[0] = sum2[0] + prod2[0]; sum2[1] = sum2[1] + prod2[1]; } sum1[0] = sum1[0] + sum2[0]; sum1[1] = sum1[1] + sum2[1]; { tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1]; tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1]; tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0]; } tmp3[0] = tmp1[0] + tmp2[0]; tmp3[1] = tmp1[1] + tmp2[1]; y_i[yi] = tmp3[0]; y_i[yi + 1] = tmp3[1]; } } else { /* uplo == blas_upper */ for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) { sum1[0] = sum1[1] = 0.0; sum2[0] = sum2[1] = 0.0; for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem = x_head_i[xi]; { prod1[0] = a_elem[0] * x_elem; prod1[1] = a_elem[1] * x_elem; } sum1[0] = sum1[0] + prod1[0]; sum1[1] = sum1[1] + prod1[1]; x_elem = x_tail_i[xi]; { prod2[0] = a_elem[0] * x_elem; prod2[1] = a_elem[1] * x_elem; } sum2[0] = sum2[0] + prod2[0]; sum2[1] = sum2[1] + prod2[1]; } diag_elem = a_i[aij]; x_elem = x_head_i[xi]; prod1[0] = x_elem * diag_elem; prod1[1] = 0.0; sum1[0] = sum1[0] + prod1[0]; sum1[1] = sum1[1] + prod1[1]; x_elem = x_tail_i[xi]; prod2[0] = x_elem * diag_elem; prod2[1] = 0.0; sum2[0] = sum2[0] + prod2[0]; sum2[1] = sum2[1] + prod2[1]; j++; aij += incaij2; xi += incx; for (; j < n; j++, aij += incaij2, xi += incx) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem = x_head_i[xi]; { prod1[0] = a_elem[0] * x_elem; prod1[1] = a_elem[1] * x_elem; } sum1[0] = sum1[0] + prod1[0]; sum1[1] = sum1[1] + prod1[1]; x_elem = x_tail_i[xi]; { prod2[0] = a_elem[0] * x_elem; prod2[1] = a_elem[1] * x_elem; } sum2[0] = sum2[0] + prod2[0]; sum2[1] = sum2[1] + prod2[1]; } sum1[0] = sum1[0] + sum2[0]; sum1[1] = sum1[1] + sum2[1]; { tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1]; tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1]; tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0]; } tmp3[0] = tmp1[0] + tmp2[0]; tmp3[1] = tmp1[1] + tmp2[1]; y_i[yi] = tmp3[0]; y_i[yi + 1] = tmp3[1]; } } } /* end BLAS_chemv2_c_s */
void BLAS_ztrmv_c_testgen(int norm, enum blas_order_type order, enum blas_uplo_type uplo, enum blas_trans_type trans, enum blas_diag_type diag, int n, void *alpha, int alpha_flag, void *T, int ldt, void *x, int *seed, double *head_r_true, double *tail_r_true) /* * Purpose * ======= * * Generates alpha, T and x, where T is a triangular matrix; and * computes r_true. * * Arguments * ========= * * norm (input) blas_norm_type * * order (input) blas_order_type * Order of T; row or column major * * uplo (input) blas_uplo_type * Whether T is upper or lower * * trans (input) blas_trans_type * No trans, trans, conj trans * * diag (input) blas_diag_type * non unit, unit * * n (input) int * Dimension of AP and the length of vector x * * alpha (input/output) void* * If alpha_flag = 1, alpha is input. * If alpha_flag = 0, alpha is output. * * alpha_flag (input) int * = 0 : alpha is free, and is output. * = 1 : alpha is fixed on input. * * T (output) void* * * x (input/output) void* * * seed (input/output) int * * head_r_true (output) double* * The leading part of the truth in double-double. * * tail_r_true (output) double* * The trailing part of the truth in double-double. * */ { double *x_i = (double *) x; float *T_i = (float *) T; double *alpha_i = (double *) alpha; double *x_vec; float *t_vec; double beta[2]; double r[2]; double head_r_true_elem[2], tail_r_true_elem[2]; double x_elem[2]; float t_elem[2]; int inc_tvec = 1, inc_xvec = 1; int xvec_i, tvec_j; int xi; int ti, tij; int inc_ti, inc_tij; int inc_xi; int i, j; r[0] = r[1] = 0.0; beta[0] = beta[1] = 0.0; inc_tvec *= 2; inc_xvec *= 2; t_vec = (float *) blas_malloc(n * sizeof(float) * 2); if (n > 0 && t_vec == NULL) { BLAS_error("blas_malloc", 0, 0, "malloc failed.\n"); }; x_vec = (double *) blas_malloc(n * sizeof(double) * 2); if (n > 0 && x_vec == NULL) { BLAS_error("blas_malloc", 0, 0, "malloc failed.\n"); }; if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_xi = -1; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = -1; } else { inc_ti = -1; inc_tij = -ldt; } } else { inc_xi = 1; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = 1; } else { inc_ti = 1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_xi = 1; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = ldt; } else { inc_ti = ldt; inc_tij = 1; } } else { inc_xi = -1; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = -ldt; } else { inc_ti = -ldt; inc_tij = -1; } } } inc_xi *= 2; inc_ti *= 2; inc_tij *= 2; /* Call dot_testgen n times. Each call will generate * one row of T and one element of x. */ ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); xi = (inc_xi > 0 ? 0 : -(n - 1) * inc_xi); xvec_i = 0; for (i = 0; i < n; i++) { /* Generate the i-th element of x_vec and all of t_vec. */ if (diag == blas_unit_diag) { /* Since we need alpha = beta, we fix alpha if alpha_flag = 0. */ if (i == 0 && alpha_flag == 0) { alpha_i[0] = xrand(seed); alpha_i[1] = xrand(seed); } BLAS_zdot_z_c_testgen(i, 0, i, norm, blas_no_conj, alpha_i, 1, alpha_i, 1, x_vec, t_vec, seed, r, head_r_true_elem, tail_r_true_elem); /* Copy generated t_vec to T. */ tvec_j = 0; tij = (inc_tij > 0 ? ti : ti - (n - 1) * inc_tij); for (j = 0; j < i; j++) { t_elem[0] = t_vec[tvec_j]; t_elem[1] = t_vec[tvec_j + 1]; if (trans == blas_conj_trans) { t_elem[1] = -t_elem[1]; } T_i[tij] = t_elem[0]; T_i[tij + 1] = t_elem[1]; tvec_j += inc_tvec; tij += inc_tij; } /* Set the diagonal element to 1. */ t_elem[0] = 1.0; t_elem[1] = 0.0; T_i[tij] = t_elem[0]; T_i[tij + 1] = t_elem[1]; /* Set x[i] to be r. */ x_i[xi] = r[0]; x_i[xi + 1] = r[1]; x_vec[xvec_i] = r[0]; x_vec[xvec_i + 1] = r[1]; } else { BLAS_zdot_z_c_testgen(i + 1, 0, i, norm, blas_no_conj, alpha, (i == 0 ? alpha_flag : 1), beta, 1, x_vec, t_vec, seed, r, head_r_true_elem, tail_r_true_elem); /* Copy generated t_vec to T. */ tvec_j = 0; tij = (inc_tij > 0 ? ti : ti - (n - 1) * inc_tij); for (j = 0; j <= i; j++) { t_elem[0] = t_vec[tvec_j]; t_elem[1] = t_vec[tvec_j + 1]; if (trans == blas_conj_trans) { t_elem[1] = -t_elem[1]; } T_i[tij] = t_elem[0]; T_i[tij + 1] = t_elem[1]; tvec_j += inc_tvec; tij += inc_tij; } /* Copy generated x_vec[i] to appropriate position in x. */ x_elem[0] = x_vec[xvec_i]; x_elem[1] = x_vec[xvec_i + 1]; x_i[xi] = x_elem[0]; x_i[xi + 1] = x_elem[1]; } /* Copy r_true */ head_r_true[xi] = head_r_true_elem[0]; head_r_true[xi + 1] = head_r_true_elem[1]; tail_r_true[xi] = tail_r_true_elem[0]; tail_r_true[xi + 1] = tail_r_true_elem[1]; xvec_i += inc_xvec; xi += inc_xi; ti += inc_ti; } blas_free(x_vec); blas_free(t_vec); }
void BLAS_dgbmv_s_d_x(enum blas_order_type order, enum blas_trans_type trans, int m, int n, int kl, int ku, double alpha, const float *a, int lda, const double *x, int incx, double beta, double *y, int incy, enum blas_prec_type prec) /* * Purpose * ======= * * gbmv computes y = alpha * A * x + beta * y, where * * A is a m x n banded matrix * x is a n x 1 vector * y is a m x 1 vector * alpha and beta are scalars * * * Arguments * ========= * * order (input) blas_order_type * Order of AP; row or column major * * trans (input) blas_trans_type * Transpose of AB; no trans, * trans, or conjugate trans * * m (input) int * Dimension of AB * * n (input) int * Dimension of AB and the length of vector x * * kl (input) int * Number of lower diagnols of AB * * ku (input) int * Number of upper diagnols of AB * * alpha (input) double * * AB (input) float* * * lda (input) int * Leading dimension of AB * lda >= ku + kl + 1 * * x (input) double* * * incx (input) int * The stride for vector x. * * beta (input) double * * y (input/output) double* * * incy (input) int * The stride for vector y. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * * * LOCAL VARIABLES * =============== * * As an example, these variables are described on the mxn, column * major, banded matrix described in section 2.2.3 of the specification * * astart indexes first element in A where computation begins * * incai1 indexes first element in row where row is less than lbound * * incai2 indexes first element in row where row exceeds lbound * * lbound denotes the number of rows before first element shifts * * rbound denotes the columns where there is blank space * * ra index of the rightmost element for a given row * * la index of leftmost elements for a given row * * ra - la width of a row * * rbound * la ra ____|_____ * | | | | * | a00 a01 * * * * lbound -| a10 a11 a12 * * * | a20 a21 a22 a23 * * * a31 a32 a33 a34 * * * a42 a43 a44 * * Varations on order and transpose have been implemented by modifying these * local variables. * */ { static const char routine_name[] = "BLAS_dgbmv_s_d_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous: { int ky, iy, kx, jx, j, i, rbound, lbound, ra, la, lenx, leny; int incaij, aij, incai1, incai2, astart, ai; double *y_i = y; const float *a_i = a; const double *x_i = x; double alpha_i = alpha; double beta_i = beta; double tmp1; double tmp2; double result; double sum; double prod; float a_elem; double x_elem; double y_elem; if (order != blas_colmajor && order != blas_rowmajor) BLAS_error(routine_name, -1, order, NULL); if (trans != blas_no_trans && trans != blas_trans && trans != blas_conj_trans) { BLAS_error(routine_name, -2, trans, NULL); } if (m < 0) BLAS_error(routine_name, -3, m, NULL); if (n < 0) BLAS_error(routine_name, -4, n, NULL); if (kl < 0 || kl >= m) BLAS_error(routine_name, -5, kl, NULL); if (ku < 0 || ku >= n) BLAS_error(routine_name, -6, ku, NULL); if (lda < kl + ku + 1) BLAS_error(routine_name, -9, lda, NULL); if (incx == 0) BLAS_error(routine_name, -11, incx, NULL); if (incy == 0) BLAS_error(routine_name, -14, incy, NULL); if ((m == 0) || (n == 0) || (((alpha_i == 0.0) && (beta_i == 1.0)))) return; if (trans == blas_no_trans) { lenx = n; leny = m; } else { /* change back */ lenx = m; leny = n; } if (incx < 0) { kx = -(lenx - 1) * incx; } else { kx = 0; } if (incy < 0) { ky = -(leny - 1) * incy; } else { ky = 0; } /* if alpha = 0, return y = y*beta */ if ((order == blas_colmajor) && (trans == blas_no_trans)) { astart = ku; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else if ((order == blas_colmajor) && (trans != blas_no_trans)) { astart = ku; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = ku; rbound = m - kl - 1; ra = kl; } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) { astart = kl; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else { /* rowmajor and blas_trans */ astart = kl; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = ku; rbound = m - kl - 1; ra = kl; } la = 0; ai = astart; iy = ky; for (i = 0; i < leny; i++) { sum = 0.0; aij = ai; jx = kx; for (j = ra - la; j >= 0; j--) { x_elem = x_i[jx]; a_elem = a_i[aij]; prod = x_elem * a_elem; sum = sum + prod; aij += incaij; jx += incx; } tmp1 = sum * alpha_i; y_elem = y_i[iy]; tmp2 = beta_i * y_elem; result = tmp1 + tmp2; y_i[iy] = result; iy += incy; if (i >= lbound) { kx += incx; ai += incai2; la++; } else { ai += incai1; } if (i < rbound) { ra++; } } } break; case blas_prec_extra: { int ky, iy, kx, jx, j, i, rbound, lbound, ra, la, lenx, leny; int incaij, aij, incai1, incai2, astart, ai; double *y_i = y; const float *a_i = a; const double *x_i = x; double alpha_i = alpha; double beta_i = beta; double head_tmp1, tail_tmp1; double head_tmp2, tail_tmp2; double result; double head_sum, tail_sum; double head_prod, tail_prod; float a_elem; double x_elem; double y_elem; FPU_FIX_DECL; if (order != blas_colmajor && order != blas_rowmajor) BLAS_error(routine_name, -1, order, NULL); if (trans != blas_no_trans && trans != blas_trans && trans != blas_conj_trans) { BLAS_error(routine_name, -2, trans, NULL); } if (m < 0) BLAS_error(routine_name, -3, m, NULL); if (n < 0) BLAS_error(routine_name, -4, n, NULL); if (kl < 0 || kl >= m) BLAS_error(routine_name, -5, kl, NULL); if (ku < 0 || ku >= n) BLAS_error(routine_name, -6, ku, NULL); if (lda < kl + ku + 1) BLAS_error(routine_name, -9, lda, NULL); if (incx == 0) BLAS_error(routine_name, -11, incx, NULL); if (incy == 0) BLAS_error(routine_name, -14, incy, NULL); if ((m == 0) || (n == 0) || (((alpha_i == 0.0) && (beta_i == 1.0)))) return; if (trans == blas_no_trans) { lenx = n; leny = m; } else { /* change back */ lenx = m; leny = n; } if (incx < 0) { kx = -(lenx - 1) * incx; } else { kx = 0; } if (incy < 0) { ky = -(leny - 1) * incy; } else { ky = 0; } FPU_FIX_START; /* if alpha = 0, return y = y*beta */ if ((order == blas_colmajor) && (trans == blas_no_trans)) { astart = ku; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else if ((order == blas_colmajor) && (trans != blas_no_trans)) { astart = ku; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = ku; rbound = m - kl - 1; ra = kl; } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) { astart = kl; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else { /* rowmajor and blas_trans */ astart = kl; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = ku; rbound = m - kl - 1; ra = kl; } la = 0; ai = astart; iy = ky; for (i = 0; i < leny; i++) { head_sum = tail_sum = 0.0; aij = ai; jx = kx; for (j = ra - la; j >= 0; j--) { x_elem = x_i[jx]; a_elem = a_i[aij]; { double dt = (double) a_elem; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = x_elem * split; a1 = con - x_elem; a1 = con - a1; a2 = x_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = x_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } aij += incaij; jx += incx; } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } y_elem = y_i[iy]; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = y_elem * split; b1 = con - y_elem; b1 = con - b1; b2 = y_elem - b1; head_tmp2 = beta_i * y_elem; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ result = t1 + t2; } y_i[iy] = result; iy += incy; if (i >= lbound) { kx += incx; ai += incai2; la++; } else { ai += incai1; } if (i < rbound) { ra++; } } FPU_FIX_STOP; } break; } } /* end GEMV_NAME(d, s, d, _x) */
void BLAS_cgbmv2_x(enum blas_order_type order, enum blas_trans_type trans, int m , int n, int kl, int ku, const void *alpha, const void *a, int lda, const void *head_x, const void *tail_x, int incx, const void *beta, void *y, int incy, enum blas_prec_type prec) /* * Purpose * ======= * * This routines computes the matrix product: * * y <- alpha * op(A) * (x_head + x_tail) + beta * y * * where * * A is a m x n banded matrix * x is a n x 1 vector * y is a m x 1 vector * alpha and beta are scalars * * Arguments * ========= * * order (input) blas_order_type * Order of AB; row or column major * * trans (input) blas_trans_type * Transpose of AB; no trans, * trans, or conjugate trans * * m (input) int * Dimension of AB * * n (input) int * Dimension of AB and the length of vector x and z * * kl (input) int * Number of lower diagnols of AB * * ku (input) int * Number of upper diagnols of AB * * alpha (input) const void* * * AB (input) void* * * lda (input) int * Leading dimension of AB * lda >= ku + kl + 1 * * head_x * tail_x (input) void* * * incx (input) int * The stride for vector x. * * beta (input) const void* * * y (input) const void* * * incy (input) int * The stride for vector y. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * * * LOCAL VARIABLES * =============== * * As an example, these variables are described on the mxn, column * major, banded matrix described in section 2.2.3 of the specification * * astart indexes first element in A where computation begins * * incai1 indexes first element in row where row is less than lbound * * incai2 indexes first element in row where row exceeds lbound * * lbound denotes the number of rows before first element shifts * * rbound denotes the columns where there is blank space * * ra index of the rightmost element for a given row * * la index of leftmost elements for a given row * * ra - la width of a row * * rbound * la ra ____|_____ * | | | | * | a00 a01 * * * * lbound -| a10 a11 a12 * * * | a20 a21 a22 a23 * * * a31 a32 a33 a34 * * * a42 a43 a44 * * Varations on order and transpose have been implemented by modifying these * local variables. * */ { static const char routine_name[] = "BLAS_cgbmv2_x"; switch (prec) { case blas_prec_single:{ int iy0 , iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny; int incaij, aij, incai1, incai2, astart, ai; float *y_i = (float *)y; const float *a_i = (float *)a; const float *head_x_i = (float *)head_x; const float *tail_x_i = (float *)tail_x; float *alpha_i = (float *)alpha; float *beta_i = (float *)beta; float tmp1[2]; float tmp2[2]; float tmp3[2]; float tmp4[2]; float result[2]; float sum1[2]; float sum2[2]; float prod[2]; float a_elem[2]; float x_elem[2]; float y_elem[2]; if (order != blas_colmajor && order != blas_rowmajor) BLAS_error(routine_name, -1, order, NULL); if (trans != blas_no_trans && trans != blas_trans && trans != blas_conj_trans) { BLAS_error(routine_name, -2, trans, NULL); } if (m < 0) BLAS_error(routine_name, -3, m, NULL); if (n < 0) BLAS_error(routine_name, -4, n, NULL); if (kl < 0 || kl >= m) BLAS_error(routine_name, -5, kl, NULL); if (ku < 0 || ku >= n) BLAS_error(routine_name, -6, ku, NULL); if (lda < kl + ku + 1) BLAS_error(routine_name, -9, lda, NULL); if (incx == 0) BLAS_error(routine_name, -12, incx, NULL); if (incy == 0) BLAS_error(routine_name, -15, incy, NULL); if (m == 0 || n == 0) return; if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0))) return; if (trans == blas_no_trans) { lenx = n; leny = m; } else { lenx = m; leny = n; } ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx; iy0 = (incy > 0) ? 0 : -(leny - 1) * incy; /* if alpha = 0, return y = y*beta */ if ((order == blas_colmajor) && (trans == blas_no_trans)) { astart = ku; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else if ((order == blas_colmajor) && (trans != blas_no_trans)) { astart = ku; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = ku; rbound = m - kl - 1; ra = kl; } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) { astart = kl; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else { /* rowmajor and blas_trans */ astart = kl; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = ku; rbound = m - kl - 1; ra = kl; } incx *= 2; incy *= 2; incaij *= 2; incai1 *= 2; incai2 *= 2; astart *= 2; iy0 *= 2; ix0 *= 2; la = 0; ai = astart; iy = iy0; for (i = 0; i < leny; i++) { sum1[0] = sum1[1] = 0.0; sum2[0] = sum2[1] = 0.0; aij = ai; jx = ix0; if (trans != blas_conj_trans) { for (j = ra - la; j >= 0; j--) { x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; { prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1]; prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0]; } sum1[0] = sum1[0] + prod[0]; sum1[1] = sum1[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1]; prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0]; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } } else { for (j = ra - la; j >= 0; j--) { x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; { prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1]; prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0]; } sum1[0] = sum1[0] + prod[0]; sum1[1] = sum1[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1]; prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0]; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } } { tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1]; tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0]; } { tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1]; tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0]; } tmp3[0] = tmp1[0] + tmp2[0]; tmp3[1] = tmp1[1] + tmp2[1]; y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1]; tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0]; } result[0] = tmp4[0] + tmp3[0]; result[1] = tmp4[1] + tmp3[1]; y_i[iy] = result[0]; y_i[iy + 1] = result[1]; iy += incy; if (i >= lbound) { ix0 += incx; ai += incai2; la++; } else { ai += incai1; } if (i < rbound) { ra++; } } break; } case blas_prec_double: case blas_prec_indigenous: { int iy0 , iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny; int incaij, aij, incai1, incai2, astart, ai; float *y_i = (float *)y; const float *a_i = (float *)a; const float *head_x_i = (float *)head_x; const float *tail_x_i = (float *)tail_x; float *alpha_i = (float *)alpha; float *beta_i = (float *)beta; double tmp1[2]; double tmp2[2]; double tmp3[2]; double tmp4[2]; float result[2]; double sum1[2]; double sum2[2]; double prod[2]; float a_elem[2]; float x_elem[2]; float y_elem[2]; if (order != blas_colmajor && order != blas_rowmajor) BLAS_error(routine_name, -1, order, NULL); if (trans != blas_no_trans && trans != blas_trans && trans != blas_conj_trans) { BLAS_error(routine_name, -2, trans, NULL); } if (m < 0) BLAS_error(routine_name, -3, m, NULL); if (n < 0) BLAS_error(routine_name, -4, n, NULL); if (kl < 0 || kl >= m) BLAS_error(routine_name, -5, kl, NULL); if (ku < 0 || ku >= n) BLAS_error(routine_name, -6, ku, NULL); if (lda < kl + ku + 1) BLAS_error(routine_name, -9, lda, NULL); if (incx == 0) BLAS_error(routine_name, -12, incx, NULL); if (incy == 0) BLAS_error(routine_name, -15, incy, NULL); if (m == 0 || n == 0) return; if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0))) return; if (trans == blas_no_trans) { lenx = n; leny = m; } else { lenx = m; leny = n; } ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx; iy0 = (incy > 0) ? 0 : -(leny - 1) * incy; /* if alpha = 0, return y = y*beta */ if ((order == blas_colmajor) && (trans == blas_no_trans)) { astart = ku; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else if ((order == blas_colmajor) && (trans != blas_no_trans)) { astart = ku; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = ku; rbound = m - kl - 1; ra = kl; } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) { astart = kl; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else { /* rowmajor and blas_trans */ astart = kl; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = ku; rbound = m - kl - 1; ra = kl; } incx *= 2; incy *= 2; incaij *= 2; incai1 *= 2; incai2 *= 2; astart *= 2; iy0 *= 2; ix0 *= 2; la = 0; ai = astart; iy = iy0; for (i = 0; i < leny; i++) { sum1[0] = sum1[1] = 0.0; sum2[0] = sum2[1] = 0.0; aij = ai; jx = ix0; if (trans != blas_conj_trans) { for (j = ra - la; j >= 0; j--) { x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; { prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1]; prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0]; } sum1[0] = sum1[0] + prod[0]; sum1[1] = sum1[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1]; prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0]; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } } else { for (j = ra - la; j >= 0; j--) { x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; { prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1]; prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0]; } sum1[0] = sum1[0] + prod[0]; sum1[1] = sum1[1] + prod[1]; x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1]; prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0]; } sum2[0] = sum2[0] + prod[0]; sum2[1] = sum2[1] + prod[1]; aij += incaij; jx += incx; } } { tmp1[0] = (double)sum1[0] * alpha_i[0] - (double)sum1[1] * alpha_i[1]; tmp1[1] = (double)sum1[0] * alpha_i[1] + (double)sum1[1] * alpha_i[0]; } { tmp2[0] = (double)sum2[0] * alpha_i[0] - (double)sum2[1] * alpha_i[1]; tmp2[1] = (double)sum2[0] * alpha_i[1] + (double)sum2[1] * alpha_i[0]; } tmp3[0] = tmp1[0] + tmp2[0]; tmp3[1] = tmp1[1] + tmp2[1]; y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp4[0] = (double)beta_i[0] * y_elem[0] - (double)beta_i[1] * y_elem[1]; tmp4[1] = (double)beta_i[0] * y_elem[1] + (double)beta_i[1] * y_elem[0]; } result[0] = tmp4[0] + tmp3[0]; result[1] = tmp4[1] + tmp3[1]; y_i[iy] = result[0]; y_i[iy + 1] = result[1]; iy += incy; if (i >= lbound) { ix0 += incx; ai += incai2; la++; } else { ai += incai1; } if (i < rbound) { ra++; } } } break; case blas_prec_extra: { int iy0 , iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx, leny; int incaij, aij, incai1, incai2, astart, ai; float *y_i = (float *)y; const float *a_i = (float *)a; const float *head_x_i = (float *)head_x; const float *tail_x_i = (float *)tail_x; float *alpha_i = (float *)alpha; float *beta_i = (float *)beta; double head_tmp1[2], tail_tmp1[2]; double head_tmp2[2], tail_tmp2[2]; double head_tmp3[2], tail_tmp3[2]; double head_tmp4[2], tail_tmp4[2]; float result[2]; double head_sum1[2], tail_sum1[2]; double head_sum2[2], tail_sum2[2]; double head_prod[2], tail_prod[2]; float a_elem[2]; float x_elem[2]; float y_elem[2]; FPU_FIX_DECL; if (order != blas_colmajor && order != blas_rowmajor) BLAS_error(routine_name, -1, order, NULL); if (trans != blas_no_trans && trans != blas_trans && trans != blas_conj_trans) { BLAS_error(routine_name, -2, trans, NULL); } if (m < 0) BLAS_error(routine_name, -3, m, NULL); if (n < 0) BLAS_error(routine_name, -4, n, NULL); if (kl < 0 || kl >= m) BLAS_error(routine_name, -5, kl, NULL); if (ku < 0 || ku >= n) BLAS_error(routine_name, -6, ku, NULL); if (lda < kl + ku + 1) BLAS_error(routine_name, -9, lda, NULL); if (incx == 0) BLAS_error(routine_name, -12, incx, NULL); if (incy == 0) BLAS_error(routine_name, -15, incy, NULL); if (m == 0 || n == 0) return; if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0))) return; if (trans == blas_no_trans) { lenx = n; leny = m; } else { lenx = m; leny = n; } ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx; iy0 = (incy > 0) ? 0 : -(leny - 1) * incy; FPU_FIX_START; /* if alpha = 0, return y = y*beta */ if ((order == blas_colmajor) && (trans == blas_no_trans)) { astart = ku; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else if ((order == blas_colmajor) && (trans != blas_no_trans)) { astart = ku; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = ku; rbound = m - kl - 1; ra = kl; } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) { astart = kl; incai1 = lda - 1; incai2 = lda; incaij = 1; lbound = kl; rbound = n - ku - 1; ra = ku; } else { /* rowmajor and blas_trans */ astart = kl; incai1 = 1; incai2 = lda; incaij = lda - 1; lbound = ku; rbound = m - kl - 1; ra = kl; } incx *= 2; incy *= 2; incaij *= 2; incai1 *= 2; incai2 *= 2; astart *= 2; iy0 *= 2; ix0 *= 2; la = 0; ai = astart; iy = iy0; for (i = 0; i < leny; i++) { head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0; head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0; aij = ai; jx = ix0; if (trans != blas_conj_trans) { for (j = ra - la; j >= 0; j--) { x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; { double head_e1, tail_e1; double d1; double d2; /* Real part */ d1 = (double)x_elem[0] * a_elem[0]; d2 = (double)-x_elem[1] * a_elem[1]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[0] = head_e1; tail_prod[0] = tail_e1; /* imaginary part */ d1 = (double)x_elem[0] * a_elem[1]; d2 = (double)x_elem[1] * a_elem[0]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[1] = head_e1; tail_prod[1] = tail_e1; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum1[0]; tail_a = tail_sum1[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum1[0] = head_t; tail_sum1[0] = tail_t; /* Imaginary part */ head_a = head_sum1[1]; tail_a = tail_sum1[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum1[1] = head_t; tail_sum1[1] = tail_t; } x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { double head_e1, tail_e1; double d1; double d2; /* Real part */ d1 = (double)x_elem[0] * a_elem[0]; d2 = (double)-x_elem[1] * a_elem[1]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[0] = head_e1; tail_prod[0] = tail_e1; /* imaginary part */ d1 = (double)x_elem[0] * a_elem[1]; d2 = (double)x_elem[1] * a_elem[0]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[1] = head_e1; tail_prod[1] = tail_e1; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum2[0]; tail_a = tail_sum2[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum2[0] = head_t; tail_sum2[0] = tail_t; /* Imaginary part */ head_a = head_sum2[1]; tail_a = tail_sum2[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum2[1] = head_t; tail_sum2[1] = tail_t; } aij += incaij; jx += incx; } } else { for (j = ra - la; j >= 0; j--) { x_elem[0] = head_x_i[jx]; x_elem[1] = head_x_i[jx + 1]; a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; { double head_e1, tail_e1; double d1; double d2; /* Real part */ d1 = (double)x_elem[0] * a_elem[0]; d2 = (double)-x_elem[1] * a_elem[1]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[0] = head_e1; tail_prod[0] = tail_e1; /* imaginary part */ d1 = (double)x_elem[0] * a_elem[1]; d2 = (double)x_elem[1] * a_elem[0]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[1] = head_e1; tail_prod[1] = tail_e1; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum1[0]; tail_a = tail_sum1[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum1[0] = head_t; tail_sum1[0] = tail_t; /* Imaginary part */ head_a = head_sum1[1]; tail_a = tail_sum1[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum1[1] = head_t; tail_sum1[1] = tail_t; } x_elem[0] = tail_x_i[jx]; x_elem[1] = tail_x_i[jx + 1]; { double head_e1, tail_e1; double d1; double d2; /* Real part */ d1 = (double)x_elem[0] * a_elem[0]; d2 = (double)-x_elem[1] * a_elem[1]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[0] = head_e1; tail_prod[0] = tail_e1; /* imaginary part */ d1 = (double)x_elem[0] * a_elem[1]; d2 = (double)x_elem[1] * a_elem[0]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_prod[1] = head_e1; tail_prod[1] = tail_e1; } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_sum2[0]; tail_a = tail_sum2[0]; head_b = head_prod[0]; tail_b = tail_prod[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum2[0] = head_t; tail_sum2[0] = tail_t; /* Imaginary part */ head_a = head_sum2[1]; tail_a = tail_sum2[1]; head_b = head_prod[1]; tail_b = tail_prod[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_sum2[1] = head_t; tail_sum2[1] = tail_t; } aij += incaij; jx += incx; } } { double cd [2]; cd[0] = (double)alpha_i[0]; cd[1] = (double)alpha_i[1]; { /* Compute complex-extra = complex-extra * complex-double. */ double head_a0, tail_a0; double head_a1, tail_a1; double head_t1, tail_t1; double head_t2, tail_t2; head_a0 = head_sum1[0]; tail_a0 = tail_sum1[0]; head_a1 = head_sum1[1]; tail_a1 = tail_sum1[1]; /* real part */ { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a0 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a1 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp1[0] = head_t1; tail_tmp1[0] = tail_t1; /* imaginary part */ { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a1 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a0 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp1[1] = head_t1; tail_tmp1[1] = tail_t1; } } { double cd [2]; cd[0] = (double)alpha_i[0]; cd[1] = (double)alpha_i[1]; { /* Compute complex-extra = complex-extra * complex-double. */ double head_a0, tail_a0; double head_a1, tail_a1; double head_t1, tail_t1; double head_t2, tail_t2; head_a0 = head_sum2[0]; tail_a0 = tail_sum2[0]; head_a1 = head_sum2[1]; tail_a1 = tail_sum2[1]; /* real part */ { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a0 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a1 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } head_t2 = -head_t2; tail_t2 = -tail_t2; { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp2[0] = head_t1; tail_tmp2[0] = tail_t1; /* imaginary part */ { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a1 * split; a11 = con - head_a1; a11 = con - a11; a21 = head_a1 - a11; con = cd[0] * split; b1 = con - cd[0]; b1 = con - b1; b2 = cd[0] - b1; c11 = head_a1 * cd[0]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a1 * cd[0]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_a0 * split; a11 = con - head_a0; a11 = con - a11; a21 = head_a0 - a11; con = cd[1] * split; b1 = con - cd[1]; b1 = con - b1; b2 = cd[1] - b1; c11 = head_a0 * cd[1]; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_a0 * cd[1]; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_t2 = t1 + t2; tail_t2 = t2 - (head_t2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_t1 + head_t2; bv = s1 - head_t1; s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_t1 + tail_t2; bv = t1 - tail_t1; t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t1 = t1 + t2; tail_t1 = t2 - (head_t1 - t1); } head_tmp2[1] = head_t1; tail_tmp2[1] = tail_t1; } } { double head_t, tail_t; double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_tmp1[0]; tail_a = tail_tmp1[0]; head_b = head_tmp2[0]; tail_b = tail_tmp2[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp3[0] = head_t; tail_tmp3[0] = tail_t; /* Imaginary part */ head_a = head_tmp1[1]; tail_a = tail_tmp1[1]; head_b = head_tmp2[1]; tail_b = tail_tmp2[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_t = t1 + t2; tail_t = t2 - (head_t - t1); } head_tmp3[1] = head_t; tail_tmp3[1] = tail_t; } y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { double head_e1, tail_e1; double d1; double d2; /* Real part */ d1 = (double)beta_i[0] * y_elem[0]; d2 = (double)-beta_i[1] * y_elem[1]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_tmp4[0] = head_e1; tail_tmp4[0] = tail_e1; /* imaginary part */ d1 = (double)beta_i[0] * y_elem[1]; d2 = (double)beta_i[1] * y_elem[0]; { /* Compute double-double = double + double. */ double e , t1, t2; /* Knuth trick. */ t1 = d1 + d2; e = t1 - d1; t2 = ((d2 - e) + (d1 - (t1 - e))); /* The result is t1 + t2, after normalization. */ head_e1 = t1 + t2; tail_e1 = t2 - (head_e1 - t1); } head_tmp4[1] = head_e1; tail_tmp4[1] = tail_e1; } { double head_a, tail_a; double head_b, tail_b; /* Real part */ head_a = head_tmp4[0]; tail_a = tail_tmp4[0]; head_b = head_tmp3[0]; tail_b = tail_tmp3[0]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ result[0] = t1 + t2; } /* Imaginary part */ head_a = head_tmp4[1]; tail_a = tail_tmp4[1]; head_b = head_tmp3[1]; tail_b = tail_tmp3[1]; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_a + head_b; bv = s1 - head_a; s2 = ((head_b - bv) + (head_a - (s1 - bv))); /* Add two lo words. */ t1 = tail_a + tail_b; bv = t1 - tail_a; t2 = ((tail_b - bv) + (tail_a - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ result[1] = t1 + t2; } } y_i[iy] = result[0]; y_i[iy + 1] = result[1]; iy += incy; if (i >= lbound) { ix0 += incx; ai += incai2; la++; } else { ai += incai1; } if (i < rbound) { ra++; } } FPU_FIX_STOP; } break; } } /* end BLAS_cgbmv2_x */
void BLAS_dgemv2_d_s_x(enum blas_order_type order, enum blas_trans_type trans, int m , int n, double alpha, const double *a, int lda, const float *head_x, const float *tail_x, int incx, double beta , double *y, int incy, enum blas_prec_type prec) /* * Purpose * ======= * * Computes y = alpha * op(A) * head_x + alpha * op(A) * tail_x + beta * y, * where A is a general matrix. * * Arguments * ========= * * order (input) blas_order_type * Order of A; row or column major * * trans (input) blas_trans_type * Transpose of A: no trans, trans, or conjugate trans * * m (input) int * Dimension of A * * n (input) int * Dimension of A and the length of vector x and z * * alpha (input) double * * A (input) const double* * * lda (input) int * Leading dimension of A * * head_x * tail_x (input) const float* * * incx (input) int * The stride for vector x. * * beta (input) double * * y (input) const double* * * incy (input) int * The stride for vector y. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { static const char routine_name[] = "BLAS_dgemv2_d_s_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous:{ int i , j; int iy , jx, kx, ky; int lenx , leny; int ai , aij; int incai, incaij; const double *a_i = a; const float *head_x_i = head_x; const float *tail_x_i = tail_x; double *y_i = y; double alpha_i = alpha; double beta_i = beta; double a_elem; float x_elem; double y_elem; double prod; double sum; double sum2; double tmp1; double tmp2; /* all error calls */ if (m < 0) BLAS_error(routine_name, -3, m, 0); else if (n <= 0) BLAS_error(routine_name, -4, n, 0); else if (incx == 0) BLAS_error(routine_name, -10, incx, 0); else if (incy == 0) BLAS_error(routine_name, -13, incy, 0); if ((order == blas_rowmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = lda; incaij = 1; } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) { lenx = m; leny = n; incai = 1; incaij = lda; } else if ((order == blas_colmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = 1; incaij = lda; } else { /* colmajor and blas_trans */ lenx = m; leny = n; incai = lda; incaij = 1; } if (lda < leny) BLAS_error(routine_name, -7, lda, NULL); if (incx > 0) kx = 0; else kx = (1 - lenx) * incx; if (incy > 0) ky = 0; else ky = (1 - leny) * incy; /* No extra-precision needed for alpha = 0 */ if (alpha_i == 0.0) { if (beta_i == 0.0) { iy = ky; for (i = 0; i < leny; i++) { y_i[iy] = 0.0; iy += incy; } } else if (!(beta_i == 0.0)) { iy = ky; for (i = 0; i < leny; i++) { y_elem = y_i[iy]; tmp1 = y_elem * beta_i; y_i[iy] = tmp1; iy += incy; } } } else { /* alpha != 0 */ /* * if beta = 0, we can save m multiplies: y = alpha*A*head_x + * alpha*A*tail_x */ if (beta_i == 0.0) { if (alpha_i == 1.0) { /* save m more multiplies if alpha = 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum = 0.0; sum2 = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; prod = a_elem * x_elem; sum = sum + prod; x_elem = tail_x_i[jx]; prod = a_elem * x_elem; sum2 = sum2 + prod; aij += incaij; jx += incx; } sum = sum + sum2; y_i[iy] = sum; ai += incai; iy += incy; } /* end for */ } else { /* alpha != 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum = 0.0; sum2 = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; prod = a_elem * x_elem; sum = sum + prod; x_elem = tail_x_i[jx]; prod = a_elem * x_elem; sum2 = sum2 + prod; aij += incaij; jx += incx; } tmp1 = sum * alpha_i; tmp2 = sum2 * alpha_i; tmp1 = tmp1 + tmp2; y_i[iy] = tmp1; ai += incai; iy += incy; } } } else { /* beta != 0 */ if (alpha_i == 1.0) { /* save m multiplies if alpha = 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum = 0.0;; sum2 = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; prod = a_elem * x_elem; sum = sum + prod; x_elem = tail_x_i[jx]; prod = a_elem * x_elem; sum2 = sum2 + prod; aij += incaij; jx += incx; } sum = sum + sum2; y_elem = y_i[iy]; tmp1 = y_elem * beta_i; tmp2 = sum + tmp1; y_i[iy] = tmp2; ai += incai; iy += incy; } } else { /* alpha != 1, the most general form: y = * alpha*A*head_x + alpha*A*tail_x + beta*y */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum = 0.0;; sum2 = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; prod = a_elem * x_elem; sum = sum + prod; x_elem = tail_x_i[jx]; prod = a_elem * x_elem; sum2 = sum2 + prod; aij += incaij; jx += incx; } tmp1 = sum * alpha_i; tmp2 = sum2 * alpha_i; tmp1 = tmp1 + tmp2; y_elem = y_i[iy]; tmp2 = y_elem * beta_i; tmp1 = tmp1 + tmp2; y_i[iy] = tmp1; ai += incai; iy += incy; } } } } break; } case blas_prec_extra:{ int i , j; int iy , jx, kx, ky; int lenx , leny; int ai , aij; int incai, incaij; const double *a_i = a; const float *head_x_i = head_x; const float *tail_x_i = tail_x; double *y_i = y; double alpha_i = alpha; double beta_i = beta; double a_elem; float x_elem; double y_elem; double head_prod, tail_prod; double head_sum, tail_sum; double head_sum2, tail_sum2; double head_tmp1, tail_tmp1; double head_tmp2, tail_tmp2; FPU_FIX_DECL; /* all error calls */ if (m < 0) BLAS_error(routine_name, -3, m, 0); else if (n <= 0) BLAS_error(routine_name, -4, n, 0); else if (incx == 0) BLAS_error(routine_name, -10, incx, 0); else if (incy == 0) BLAS_error(routine_name, -13, incy, 0); if ((order == blas_rowmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = lda; incaij = 1; } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) { lenx = m; leny = n; incai = 1; incaij = lda; } else if ((order == blas_colmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = 1; incaij = lda; } else { /* colmajor and blas_trans */ lenx = m; leny = n; incai = lda; incaij = 1; } if (lda < leny) BLAS_error(routine_name, -7, lda, NULL); FPU_FIX_START; if (incx > 0) kx = 0; else kx = (1 - lenx) * incx; if (incy > 0) ky = 0; else ky = (1 - leny) * incy; /* No extra-precision needed for alpha = 0 */ if (alpha_i == 0.0) { if (beta_i == 0.0) { iy = ky; for (i = 0; i < leny; i++) { y_i[iy] = 0.0; iy += incy; } } else if (!(beta_i == 0.0)) { iy = ky; for (i = 0; i < leny; i++) { y_elem = y_i[iy]; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = y_elem * split; a1 = con - y_elem; a1 = con - a1; a2 = y_elem - a1; con = beta_i * split; b1 = con - beta_i; b1 = con - b1; b2 = beta_i - b1; head_tmp1 = y_elem * beta_i; tail_tmp1 = (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2; } y_i[iy] = head_tmp1; iy += incy; } } } else { /* alpha != 0 */ /* * if beta = 0, we can save m multiplies: y = alpha*A*head_x + * alpha*A*tail_x */ if (beta_i == 0.0) { if (alpha_i == 1.0) { /* save m more multiplies if alpha = 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { head_sum = tail_sum = 0.0; head_sum2 = tail_sum2 = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } x_elem = tail_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum2 + head_prod; bv = s1 - head_sum2; s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum2 + tail_prod; bv = t1 - tail_sum2; t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum2 = t1 + t2; tail_sum2 = t2 - (head_sum2 - t1); } aij += incaij; jx += incx; } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_sum2; bv = s1 - head_sum; s2 = ((head_sum2 - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_sum2; bv = t1 - tail_sum; t2 = ((tail_sum2 - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } y_i[iy] = head_sum; ai += incai; iy += incy; } /* end for */ } else { /* alpha != 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { head_sum = tail_sum = 0.0; head_sum2 = tail_sum2 = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } x_elem = tail_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum2 + head_prod; bv = s1 - head_sum2; s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum2 + tail_prod; bv = t1 - tail_sum2; t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum2 = t1 + t2; tail_sum2 = t2 - (head_sum2 - t1); } aij += incaij; jx += incx; } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum2 * split; a11 = con - head_sum2; a11 = con - a11; a21 = head_sum2 - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum2 * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum2 * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } y_i[iy] = head_tmp1; ai += incai; iy += incy; } } } else { /* beta != 0 */ if (alpha_i == 1.0) { /* save m multiplies if alpha = 1 */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { head_sum = tail_sum = 0.0;; head_sum2 = tail_sum2 = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } x_elem = tail_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum2 + head_prod; bv = s1 - head_sum2; s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum2 + tail_prod; bv = t1 - tail_sum2; t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum2 = t1 + t2; tail_sum2 = t2 - (head_sum2 - t1); } aij += incaij; jx += incx; } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_sum2; bv = s1 - head_sum; s2 = ((head_sum2 - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_sum2; bv = t1 - tail_sum; t2 = ((tail_sum2 - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } y_elem = y_i[iy]; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = y_elem * split; a1 = con - y_elem; a1 = con - a1; a2 = y_elem - a1; con = beta_i * split; b1 = con - beta_i; b1 = con - b1; b2 = beta_i - b1; head_tmp1 = y_elem * beta_i; tail_tmp1 = (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_tmp1; bv = s1 - head_sum; s2 = ((head_tmp1 - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_tmp1; bv = t1 - tail_sum; t2 = ((tail_tmp1 - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } y_i[iy] = head_tmp2; ai += incai; iy += incy; } } else { /* alpha != 1, the most general form: y = * alpha*A*head_x + alpha*A*tail_x + beta*y */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { head_sum = tail_sum = 0.0;; head_sum2 = tail_sum2 = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem = a_i[aij]; x_elem = head_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } x_elem = tail_x_i[jx]; { double dt = (double)x_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = a_elem * split; a1 = con - a_elem; a1 = con - a1; a2 = a_elem - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_prod = a_elem * dt; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum2 + head_prod; bv = s1 - head_sum2; s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum2 + tail_prod; bv = t1 - tail_sum2; t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum2 = t1 + t2; tail_sum2 = t2 - (head_sum2 - t1); } aij += incaij; jx += incx; } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } { /* Compute double-double = double-double * double. */ double a11 , a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum2 * split; a11 = con - head_sum2; a11 = con - a11; a21 = head_sum2 - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum2 * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum2 * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } y_elem = y_i[iy]; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = y_elem * split; a1 = con - y_elem; a1 = con - a1; a2 = y_elem - a1; con = beta_i * split; b1 = con - beta_i; b1 = con - b1; b2 = beta_i - b1; head_tmp2 = y_elem * beta_i; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } y_i[iy] = head_tmp1; ai += incai; iy += incy; } } } } FPU_FIX_STOP; } break; } }
/* * Purpose * ======= * * Computes y = alpha * ap * x + beta * y, where ap is a symmetric * packed matrix. * * Arguments * ========= * * order (input) blas_order_type * Order of ap; row or column major * * uplo (input) blas_uplo_type * Whether ap is upper or lower * * n (input) int * Dimension of ap and the length of vector x * * alpha (input) const void* * * ap (input) double* * * x (input) void* * * incx (input) int * The stride for vector x. * * beta (input) const void* * * y (input/output) void* * * incy (input) int * The stride for vector y. * */ void BLAS_zspmv_d_z(enum blas_order_type order, enum blas_uplo_type uplo, int n , const void *alpha, const double *ap, const void *x, int incx, const void *beta, void *y, int incy) { static const char routine_name[] = "BLAS_zspmv_d_z"; { int matrix_row, step, ap_index, ap_start, x_index, x_start; int y_start, y_index, incap; double *alpha_i = (double *)alpha; double *beta_i = (double *)beta; const double *ap_i = ap; const double *x_i = (double *)x; double *y_i = (double *)y; double rowsum[2]; double rowtmp[2]; double matval; double vecval[2]; double resval[2]; double tmp1 [2]; double tmp2 [2]; incap = 1; incx *= 2; incy *= 2; if (incx < 0) x_start = (-n + 1) * incx; else x_start = 0; if (incy < 0) y_start = (-n + 1) * incy; else y_start = 0; if (n < 1) { return; } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) { return; } /* Check for error conditions. */ if (order != blas_colmajor && order != blas_rowmajor) { BLAS_error(routine_name, -1, order, NULL); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, NULL); } if (incx == 0) { BLAS_error(routine_name, -7, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -10, incy, NULL); } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { { y_index = y_start; for (matrix_row = 0; matrix_row < n; matrix_row++) { resval[0] = y_i[y_index]; resval[1] = y_i[y_index + 1]; { tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1]; tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0]; } y_i[y_index] = tmp2[0]; y_i[y_index + 1] = tmp2[1]; y_index += incy; } } } else { if (uplo == blas_lower) order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor; if (order == blas_rowmajor) { if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } tmp1[0] = rowsum[0]; tmp1[1] = rowsum[1]; y_i[y_index] = tmp1[0]; y_i[y_index + 1] = tmp1[1]; y_index += incy; ap_start += incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } resval[0] = y_i[y_index]; resval[1] = y_i[y_index + 1]; tmp1[0] = rowsum[0]; tmp1[1] = rowsum[1]; { tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1]; tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0]; } tmp2[0] = tmp1[0] + tmp2[0]; tmp2[1] = tmp1[1] + tmp2[1]; y_i[y_index] = tmp2[0]; y_i[y_index + 1] = tmp2[1]; y_index += incy; ap_start += incap; } } } } else { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } { tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1]; tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0]; } y_i[y_index] = tmp1[0]; y_i[y_index + 1] = tmp1[1]; y_index += incy; ap_start += incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } resval[0] = y_i[y_index]; resval[1] = y_i[y_index + 1]; { tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1]; tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0]; } { tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1]; tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0]; } tmp2[0] = tmp1[0] + tmp2[0]; tmp2[1] = tmp1[1] + tmp2[1]; y_i[y_index] = tmp2[0]; y_i[y_index + 1] = tmp2[1]; y_index += incy; ap_start += incap; } } } } } else { if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (step + 1) * incap; x_index += incx; } tmp1[0] = rowsum[0]; tmp1[1] = rowsum[1]; y_i[y_index] = tmp1[0]; y_i[y_index + 1] = tmp1[1]; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (step + 1) * incap; x_index += incx; } resval[0] = y_i[y_index]; resval[1] = y_i[y_index + 1]; tmp1[0] = rowsum[0]; tmp1[1] = rowsum[1]; { tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1]; tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0]; } tmp2[0] = tmp1[0] + tmp2[0]; tmp2[1] = tmp1[1] + tmp2[1]; y_i[y_index] = tmp2[0]; y_i[y_index + 1] = tmp2[1]; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } } else { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (step + 1) * incap; x_index += incx; } { tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1]; tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0]; } y_i[y_index] = tmp1[0]; y_i[y_index + 1] = tmp1[1]; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum[0] = rowsum[1] = 0.0; rowtmp[0] = rowtmp[1] = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval[0] = x_i[x_index]; vecval[1] = x_i[x_index + 1]; { rowtmp[0] = vecval[0] * matval; rowtmp[1] = vecval[1] * matval; } rowsum[0] = rowsum[0] + rowtmp[0]; rowsum[1] = rowsum[1] + rowtmp[1]; ap_index += (step + 1) * incap; x_index += incx; } resval[0] = y_i[y_index]; resval[1] = y_i[y_index + 1]; { tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1]; tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0]; } { tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1]; tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0]; } tmp2[0] = tmp1[0] + tmp2[0]; tmp2[1] = tmp1[1] + tmp2[1]; y_i[y_index] = tmp2[0]; y_i[y_index + 1] = tmp2[1]; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } } } /* if order == ... */ } /* alpha != 0 */ } }
void BLAS_dgemm_s_d(enum blas_order_type order, enum blas_trans_type transa, enum blas_trans_type transb, int m, int n, int k, double alpha , const float *a, int lda, const double *b, int ldb, double beta , double *c, int ldc) /* * Purpose * ======= * * This routine computes the matrix product: * * C <- alpha * op(A) * op(B) + beta * C . * * where op(M) represents either M, M transpose, * or M conjugate transpose. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input matrices A, B, and C. * * transa (input) enum blas_trans_type * Operation to be done on matrix A before multiplication. * Can be no operation, transposition, or conjugate transposition. * * transb (input) enum blas_trans_type * Operation to be done on matrix B before multiplication. * Can be no operation, transposition, or conjugate transposition. * * m n k (input) int * The dimensions of matrices A, B, and C. * Matrix C is m-by-n matrix. * Matrix A is m-by-k if A is not transposed, * k-by-m otherwise. * Matrix B is k-by-n if B is not transposed, * n-by-k otherwise. * * alpha (input) double * * a (input) const float* * matrix A. * * lda (input) int * leading dimension of A. * * b (input) const double* * matrix B * * ldb (input) int * leading dimension of B. * * beta (input) double * * c (input/output) double* * matrix C * * ldc (input) int * leading dimension of C. * */ { static const char routine_name[] = "BLAS_dgemm_s_d"; /* Integer Index Variables */ int i , j, h; int ai , bj, ci; int aih , bhj, cij; /* Index into matrices a, b, c during * multiply */ int incai , incaih; /* Index increments for matrix a */ int incbj , incbhj; /* Index increments for matrix b */ int incci , inccij; /* Index increments for matrix c */ /* Input Matrices */ const float *a_i = a; const double *b_i = b; /* Output Matrix */ double *c_i = c; /* Input Scalars */ double alpha_i = alpha; double beta_i = beta; /* Temporary Floating-Point Variables */ float a_elem; double b_elem; double c_elem; double prod; double sum; double tmp1; double tmp2; /* Test for error conditions */ if (m < 0) BLAS_error(routine_name, -4, m, NULL); if (n < 0) BLAS_error(routine_name, -5, n, NULL); if (k < 0) BLAS_error(routine_name, -6, k, NULL); if (order == blas_colmajor) { if (ldc < m) BLAS_error(routine_name, -14, ldc, NULL); if (transa == blas_no_trans) { if (lda < m) BLAS_error(routine_name, -9, lda, NULL); } else { if (lda < k) BLAS_error(routine_name, -9, lda, NULL); } if (transb == blas_no_trans) { if (ldb < k) BLAS_error(routine_name, -11, ldb, NULL); } else { if (ldb < n) BLAS_error(routine_name, -11, ldb, NULL); } } else { /* row major */ if (ldc < n) BLAS_error(routine_name, -14, ldc, NULL); if (transa == blas_no_trans) { if (lda < k) BLAS_error(routine_name, -9, lda, NULL); } else { if (lda < m) BLAS_error(routine_name, -9, lda, NULL); } if (transb == blas_no_trans) { if (ldb < n) BLAS_error(routine_name, -11, ldb, NULL); } else { if (ldb < k) BLAS_error(routine_name, -11, ldb, NULL); } } /* Test for no-op */ if (n == 0 || m == 0 || k == 0) return; if (alpha_i == 0.0 && beta_i == 1.0) { return; } /* Set Index Parameters */ if (order == blas_colmajor) { incci = 1; inccij = ldc; if (transa == blas_no_trans) { incai = 1; incaih = lda; } else { incai = lda; incaih = 1; } if (transb == blas_no_trans) { incbj = ldb; incbhj = 1; } else { incbj = 1; incbhj = ldb; } } else { /* row major */ incci = ldc; inccij = 1; if (transa == blas_no_trans) { incai = lda; incaih = 1; } else { incai = 1; incaih = lda; } if (transb == blas_no_trans) { incbj = 1; incbhj = ldb; } else { incbj = ldb; incbhj = 1; } } /* Ajustment to increments */ /* alpha = 0. In this case, just return beta * C */ if (alpha_i == 0.0) { ci = 0; for (i = 0; i < m; i++, ci += incci) { cij = ci; for (j = 0; j < n; j++, cij += inccij) { c_elem = c_i[cij]; tmp1 = c_elem * beta_i; c_i[cij] = tmp1; } } } else if (alpha_i == 1.0) { /* Case alpha == 1. */ if (beta_i == 0.0) { /* Case alpha == 1, beta == 0. We compute C <--- A * B */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } prod = a_elem * b_elem; sum = sum + prod; } c_i[cij] = sum; } } } else { /* * Case alpha == 1, but beta != 0. We compute C <--- A * B + beta * C */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } prod = a_elem * b_elem; sum = sum + prod; } c_elem = c_i[cij]; tmp2 = c_elem * beta_i; tmp1 = sum; tmp1 = tmp2 + tmp1; c_i[cij] = tmp1; } } } } else { /* The most general form, C <-- alpha * A * B + beta * C */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } prod = a_elem * b_elem; sum = sum + prod; } tmp1 = sum * alpha_i; c_elem = c_i[cij]; tmp2 = c_elem * beta_i; tmp1 = tmp1 + tmp2; c_i[cij] = tmp1; } } } }
/* * Purpose * ======= * * Computes y = alpha * ap * x + beta * y, where ap is a symmetric * packed matrix. * * Arguments * ========= * * order (input) blas_order_type * Order of ap; row or column major * * uplo (input) blas_uplo_type * Whether ap is upper or lower * * n (input) int * Dimension of ap and the length of vector x * * alpha (input) double * * ap (input) float* * * x (input) double* * * incx (input) int * The stride for vector x. * * beta (input) double * * y (input/output) double* * * incy (input) int * The stride for vector y. * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * * */ void BLAS_dspmv_s_d_x(enum blas_order_type order, enum blas_uplo_type uplo, int n , double alpha, const float *ap, const double *x, int incx, double beta, double *y, int incy, enum blas_prec_type prec) { static const char routine_name[] = "BLAS_dspmv_s_d_x"; switch (prec) { case blas_prec_single: case blas_prec_indigenous: case blas_prec_double:{ { int matrix_row, step, ap_index, ap_start, x_index, x_start; int y_start , y_index, incap; double alpha_i = alpha; double beta_i = beta; const float *ap_i = ap; const double *x_i = x; double *y_i = y; double rowsum; double rowtmp; float matval; double vecval; double resval; double tmp1; double tmp2; incap = 1; if (incx < 0) x_start = (-n + 1) * incx; else x_start = 0; if (incy < 0) y_start = (-n + 1) * incy; else y_start = 0; if (n < 1) { return; } if (alpha_i == 0.0 && beta_i == 1.0) { return; } /* Check for error conditions. */ if (order != blas_colmajor && order != blas_rowmajor) { BLAS_error(routine_name, -1, order, NULL); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, NULL); } if (incx == 0) { BLAS_error(routine_name, -7, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -10, incy, NULL); } if (alpha_i == 0.0) { { y_index = y_start; for (matrix_row = 0; matrix_row < n; matrix_row++) { resval = y_i[y_index]; tmp2 = beta_i * resval; y_i[y_index] = tmp2; y_index += incy; } } } else { if (uplo == blas_lower) order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor; if (order == blas_rowmajor) { if (alpha_i == 1.0) { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } tmp1 = rowsum; y_i[y_index] = tmp1; y_index += incy; ap_start += incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } resval = y_i[y_index]; tmp1 = rowsum; tmp2 = beta_i * resval; tmp2 = tmp1 + tmp2; y_i[y_index] = tmp2; y_index += incy; ap_start += incap; } } } } else { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } tmp1 = rowsum * alpha_i; y_i[y_index] = tmp1; y_index += incy; ap_start += incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } resval = y_i[y_index]; tmp1 = rowsum * alpha_i; tmp2 = beta_i * resval; tmp2 = tmp1 + tmp2; y_i[y_index] = tmp2; y_index += incy; ap_start += incap; } } } } } else { if (alpha_i == 1.0) { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (step + 1) * incap; x_index += incx; } tmp1 = rowsum; y_i[y_index] = tmp1; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (step + 1) * incap; x_index += incx; } resval = y_i[y_index]; tmp1 = rowsum; tmp2 = beta_i * resval; tmp2 = tmp1 + tmp2; y_i[y_index] = tmp2; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } } else { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (step + 1) * incap; x_index += incx; } tmp1 = rowsum * alpha_i; y_i[y_index] = tmp1; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; rowsum = 0.0; rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; rowtmp = matval * vecval; rowsum = rowsum + rowtmp; ap_index += (step + 1) * incap; x_index += incx; } resval = y_i[y_index]; tmp1 = rowsum * alpha_i; tmp2 = beta_i * resval; tmp2 = tmp1 + tmp2; y_i[y_index] = tmp2; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } } } /* if order == ... */ } /* alpha != 0 */ } break; } case blas_prec_extra:{ { int matrix_row, step, ap_index, ap_start, x_index, x_start; int y_start , y_index, incap; double alpha_i = alpha; double beta_i = beta; const float *ap_i = ap; const double *x_i = x; double *y_i = y; double head_rowsum, tail_rowsum; double head_rowtmp, tail_rowtmp; float matval; double vecval; double resval; double head_tmp1, tail_tmp1; double head_tmp2, tail_tmp2; FPU_FIX_DECL; incap = 1; if (incx < 0) x_start = (-n + 1) * incx; else x_start = 0; if (incy < 0) y_start = (-n + 1) * incy; else y_start = 0; if (n < 1) { return; } if (alpha_i == 0.0 && beta_i == 1.0) { return; } /* Check for error conditions. */ if (order != blas_colmajor && order != blas_rowmajor) { BLAS_error(routine_name, -1, order, NULL); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, NULL); } if (incx == 0) { BLAS_error(routine_name, -7, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -10, incy, NULL); } FPU_FIX_START; if (alpha_i == 0.0) { { y_index = y_start; for (matrix_row = 0; matrix_row < n; matrix_row++) { resval = y_i[y_index]; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = resval * split; b1 = con - resval; b1 = con - b1; b2 = resval - b1; head_tmp2 = beta_i * resval; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } y_i[y_index] = head_tmp2; y_index += incy; } } } else { if (uplo == blas_lower) order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor; if (order == blas_rowmajor) { if (alpha_i == 1.0) { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } head_tmp1 = head_rowsum; tail_tmp1 = tail_rowsum; y_i[y_index] = head_tmp1; y_index += incy; ap_start += incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } resval = y_i[y_index]; head_tmp1 = head_rowsum; tail_tmp1 = tail_rowsum; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = resval * split; b1 = con - resval; b1 = con - b1; b2 = resval - b1; head_tmp2 = beta_i * resval; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } y_i[y_index] = head_tmp2; y_index += incy; ap_start += incap; } } } } else { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1 , t2; con = head_rowsum * split; a11 = con - head_rowsum; a11 = con - a11; a21 = head_rowsum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_rowsum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_rowsum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } y_i[y_index] = head_tmp1; y_index += incy; ap_start += incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (n - step - 1) * incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } resval = y_i[y_index]; { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1 , t2; con = head_rowsum * split; a11 = con - head_rowsum; a11 = con - a11; a21 = head_rowsum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_rowsum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_rowsum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = resval * split; b1 = con - resval; b1 = con - b1; b2 = resval - b1; head_tmp2 = beta_i * resval; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } y_i[y_index] = head_tmp2; y_index += incy; ap_start += incap; } } } } } else { if (alpha_i == 1.0) { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (step + 1) * incap; x_index += incx; } head_tmp1 = head_rowsum; tail_tmp1 = tail_rowsum; y_i[y_index] = head_tmp1; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (step + 1) * incap; x_index += incx; } resval = y_i[y_index]; head_tmp1 = head_rowsum; tail_tmp1 = tail_rowsum; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = resval * split; b1 = con - resval; b1 = con - b1; b2 = resval - b1; head_tmp2 = beta_i * resval; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } y_i[y_index] = head_tmp2; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } } else { if (beta_i == 0.0) { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (step + 1) * incap; x_index += incx; } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1 , t2; con = head_rowsum * split; a11 = con - head_rowsum; a11 = con - a11; a21 = head_rowsum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_rowsum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_rowsum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } y_i[y_index] = head_tmp1; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } else { { y_index = y_start; ap_start = 0; for (matrix_row = 0; matrix_row < n; matrix_row++) { x_index = x_start; ap_index = ap_start; head_rowsum = tail_rowsum = 0.0; head_rowtmp = tail_rowtmp = 0.0; for (step = 0; step < matrix_row; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += incap; x_index += incx; } for (step = matrix_row; step < n; step++) { matval = ap_i[ap_index]; vecval = x_i[x_index]; { double dt = (double)matval; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = vecval * split; b1 = con - vecval; b1 = con - b1; b2 = vecval - b1; head_rowtmp = dt * vecval; tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_rowsum + head_rowtmp; bv = s1 - head_rowsum; s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv))); /* Add two lo words. */ t1 = tail_rowsum + tail_rowtmp; bv = t1 - tail_rowsum; t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_rowsum = t1 + t2; tail_rowsum = t2 - (head_rowsum - t1); } ap_index += (step + 1) * incap; x_index += incx; } resval = y_i[y_index]; { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1 , t2; con = head_rowsum * split; a11 = con - head_rowsum; a11 = con - a11; a21 = head_rowsum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_rowsum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_rowsum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = resval * split; b1 = con - resval; b1 = con - b1; b2 = resval - b1; head_tmp2 = beta_i * resval; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* * Compute double-double = double-double + * double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp2 = t1 + t2; tail_tmp2 = t2 - (head_tmp2 - t1); } y_i[y_index] = head_tmp2; y_index += incy; ap_start += (matrix_row + 1) * incap; } } } } } /* if order == ... */ } /* alpha != 0 */ FPU_FIX_STOP; } break; } } }
void BLAS_dtbsv_s(enum blas_order_type order, enum blas_uplo_type uplo, enum blas_trans_type trans, enum blas_diag_type diag, int n , int k, double alpha, const float *t, int ldt, double *x, int incx) /* * Purpose * ======= * * This routine solves : * * x <- alpha * inverse(t) * x * * Arguments * ========= * * order (input) enum blas_order_type * column major, row major (blas_rowmajor, blas_colmajor) * * uplo (input) enum blas_uplo_type * upper, lower (blas_upper, blas_lower) * * trans (input) enum blas_trans_type * no trans, trans, conj trans * * diag (input) enum blas_diag_type * unit, non unit (blas_unit_diag, blas_non_unit_diag) * * n (input) int * the dimension of t * * k (input) int * the number of subdiagonals/superdiagonals of t * * alpha (input) double * * t (input) float* * Triangular Banded matrix * * x (input) const double* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * */ { /* Routine name */ static const char routine_name[] = "BLAS_dtbsv_s"; int i , j; /* used to keep track of loop counts */ int xi; /* used to index vector x */ int start_xi; /* used as the starting idx to vector x */ int incxi; int Tij; /* index inside of Banded structure */ int dot_start, dot_start_inc1, dot_start_inc2, dot_inc; const float *t_i = t; /* internal matrix t */ double *x_i = x; /* internal x */ double alpha_i = alpha; /* internal alpha */ if (order != blas_rowmajor && order != blas_colmajor) { BLAS_error(routine_name, -1, order, 0); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, 0); } if ((trans != blas_trans) && (trans != blas_no_trans) && (trans != blas_conj) && (trans != blas_conj_trans)) { BLAS_error(routine_name, -2, uplo, 0); } if (diag != blas_non_unit_diag && diag != blas_unit_diag) { BLAS_error(routine_name, -4, diag, 0); } if (n < 0) { BLAS_error(routine_name, -5, n, 0); } if (k >= n) { BLAS_error(routine_name, -6, k, 0); } if ((ldt < 1) || (ldt <= k)) { BLAS_error(routine_name, -9, ldt, 0); } if (incx == 0) { BLAS_error(routine_name, -11, incx, 0); } if (n <= 0) return; incxi = incx; /* configuring the vector starting idx */ if (incxi < 0) { start_xi = (1 - n) * incxi; } else { start_xi = 0; } /* if alpha is zero, then return x as a zero vector */ if (alpha_i == 0.0) { xi = start_xi; for (i = 0; i < n; i++) { x_i[xi] = 0.0; xi += incxi; } return; } /* check to see if k=0. if so, we can optimize somewhat */ if (k == 0) { if ((alpha_i == 1.0) && (diag == blas_unit_diag)) { /* nothing to do */ return; } else { /* just run the loops as is. */ } } /* get index variables prepared */ if (((trans == blas_trans) || (trans == blas_conj_trans)) ^ (order == blas_rowmajor)) { dot_start = k; } else { dot_start = 0; } if (((trans == blas_trans) || (trans == blas_conj_trans)) ^ (order == blas_rowmajor)) { dot_inc = 1; dot_start_inc1 = ldt - 1; dot_start_inc2 = ldt; } else { dot_inc = ldt - 1; dot_start_inc1 = 1; dot_start_inc2 = ldt; } if (((trans == blas_trans) || (trans == blas_conj_trans)) ^ (uplo == blas_lower)) { /* start at the first element of x */ /* substitution will proceed forwards (forwardsubstitution) */ } else { /* start at the last element of x */ /* substitution will proceed backwards (backsubstitution) */ dot_inc = -dot_inc; dot_start_inc1 = -dot_start_inc1; dot_start_inc2 = -dot_start_inc2; dot_start = ldt * (n - 1) + k - dot_start; /* order of the following 2 statements matters! */ start_xi = start_xi + (n - 1) * incxi; incxi = -incxi; } { { double temp1; /* temporary variable for calculations */ double temp2; /* temporary variable for calculations */ double x_elem; float T_element; /* loop 1 */ xi = start_xi; for (j = 0; j < k; j++) { /* each time through loop, xi lands on next x to compute. */ x_elem = x_i[xi]; /* * preform the multiplication - in this implementation we do not * seperate the alpha = 1 case */ temp1 = x_elem * alpha_i; xi = start_xi; Tij = dot_start; dot_start += dot_start_inc1; for (i = j; i > 0; i--) { T_element = t_i[Tij]; x_elem = x_i[xi]; temp2 = x_elem * T_element; temp1 = temp1 + (-temp2); xi += incxi; Tij += dot_inc; } /* for across row */ /* * if the diagonal entry is not equal to one, then divide Xj by the * entry */ if (diag == blas_non_unit_diag) { T_element = t_i[Tij]; temp1 = temp1 / T_element; } /* if (diag == blas_non_unit_diag) */ x_i[xi] = temp1; xi += incxi; } /* for j<k */ /* end loop 1 */ /* loop 2 continue without changing j to start */ for (; j < n; j++) { /* each time through loop, xi lands on next x to compute. */ x_elem = x_i[xi]; temp1 = x_elem * alpha_i; xi = start_xi; start_xi += incxi; Tij = dot_start; dot_start += dot_start_inc2; for (i = k; i > 0; i--) { T_element = t_i[Tij]; x_elem = x_i[xi]; temp2 = x_elem * T_element; temp1 = temp1 + (-temp2); xi += incxi; Tij += dot_inc; } /* for across row */ /* * if the diagonal entry is not equal to one, then divide by the * entry */ if (diag == blas_non_unit_diag) { T_element = t_i[Tij]; temp1 = temp1 / T_element; } /* if (diag == blas_non_unit_diag) */ x_i[xi] = temp1; xi += incxi; } /* for j<n */ } } } /* end BLAS_dtbsv_s */
void BLAS_dtrmv_x(enum blas_order_type order, enum blas_uplo_type uplo, enum blas_trans_type trans, enum blas_diag_type diag, int n, double alpha, const double *T, int ldt, double *x, int incx, enum blas_prec_type prec) /* * Purpose * ======= * * Computes x <-- alpha * T * x, where T is a triangular matrix. * * Arguments * ========= * * order (input) enum blas_order_type * column major, row major * * uplo (input) enum blas_uplo_type * upper, lower * * trans (input) enum blas_trans_type * no trans, trans, conj trans * * diag (input) enum blas_diag_type * unit, non unit * * n (input) int * the dimension of T * * alpha (input) double * * T (input) double* * Triangular matrix * * ldt (input) int * Leading dimension of T * * x (input) const double* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { static const char routine_name[] = "BLAS_dtrmv_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous:{ int i, j; /* used to idx matrix */ int xj, xj0; int ti, tij, tij0; int inc_ti, inc_tij; int inc_x; const double *T_i = T; /* internal matrix T */ double *x_i = x; /* internal x */ double alpha_i = alpha; /* internal alpha */ double t_elem; double x_elem; double prod; double sum; double tmp; /* all error calls */ if ((order != blas_rowmajor && order != blas_colmajor) || (uplo != blas_upper && uplo != blas_lower) || (trans != blas_trans && trans != blas_no_trans && trans != blas_conj_trans) || (diag != blas_non_unit_diag && diag != blas_unit_diag) || (ldt < n) || (incx == 0)) { BLAS_error(routine_name, 0, 0, NULL); } else if (n <= 0) { BLAS_error(routine_name, -4, n, NULL); } else if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = -1; } else { inc_ti = 1; inc_tij = -ldt; } } else { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = 1; } else { inc_ti = -1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = ldt; } else { inc_ti = -ldt; inc_tij = 1; } } else { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = -ldt; } else { inc_ti = ldt; inc_tij = -1; } } } xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x); if (alpha_i == 0.0) { xj = xj0; for (j = 0; j < n; j++) { x_i[xj] = 0.0; xj += inc_x; } } else { if (diag == blas_unit_diag) { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < (n - 1); j++) { t_elem = T_i[tij]; x_elem = x_i[xj]; prod = x_elem * t_elem; sum = sum + prod; xj += inc_x; tij += inc_tij; } x_elem = x_i[xj]; sum = sum + x_elem; if (alpha_i == 1.0) { x_i[xj] = sum; } else { tmp = sum * alpha_i; x_i[xj] = tmp; } ti += inc_ti; } } else { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < n; j++) { t_elem = T_i[tij]; x_elem = x_i[xj]; prod = x_elem * t_elem; sum = sum + prod; xj += inc_x; tij += inc_tij; } if (alpha_i == 1.0) { x_i[xj - inc_x] = sum; } else { tmp = sum * alpha_i; x_i[xj - inc_x] = tmp; } ti += inc_ti; } } } break; } case blas_prec_extra:{ int i, j; /* used to idx matrix */ int xj, xj0; int ti, tij, tij0; int inc_ti, inc_tij; int inc_x; const double *T_i = T; /* internal matrix T */ double *x_i = x; /* internal x */ double alpha_i = alpha; /* internal alpha */ double t_elem; double x_elem; double head_prod, tail_prod; double head_sum, tail_sum; double head_tmp, tail_tmp; FPU_FIX_DECL; FPU_FIX_START; /* all error calls */ if ((order != blas_rowmajor && order != blas_colmajor) || (uplo != blas_upper && uplo != blas_lower) || (trans != blas_trans && trans != blas_no_trans && trans != blas_conj_trans) || (diag != blas_non_unit_diag && diag != blas_unit_diag) || (ldt < n) || (incx == 0)) { BLAS_error(routine_name, 0, 0, NULL); } else if (n <= 0) { BLAS_error(routine_name, -4, n, NULL); } else if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = -1; } else { inc_ti = 1; inc_tij = -ldt; } } else { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = 1; } else { inc_ti = -1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = ldt; } else { inc_ti = -ldt; inc_tij = 1; } } else { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = -ldt; } else { inc_ti = ldt; inc_tij = -1; } } } xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x); if (alpha_i == 0.0) { xj = xj0; for (j = 0; j < n; j++) { x_i[xj] = 0.0; xj += inc_x; } } else { if (diag == blas_unit_diag) { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { head_sum = tail_sum = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < (n - 1); j++) { t_elem = T_i[tij]; x_elem = x_i[xj]; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = x_elem * split; a1 = con - x_elem; a1 = con - a1; a2 = x_elem - a1; con = t_elem * split; b1 = con - t_elem; b1 = con - b1; b2 = t_elem - b1; head_prod = x_elem * t_elem; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } xj += inc_x; tij += inc_tij; } x_elem = x_i[xj]; { /* Compute double-double = double-double + double. */ double e, t1, t2; /* Knuth trick. */ t1 = head_sum + x_elem; e = t1 - head_sum; t2 = ((x_elem - e) + (head_sum - (t1 - e))) + tail_sum; /* The result is t1 + t2, after normalization. */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } if (alpha_i == 1.0) { x_i[xj] = head_sum; } else { { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp = t1 + t2; tail_tmp = t2 - (head_tmp - t1); } x_i[xj] = head_tmp; } ti += inc_ti; } } else { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { head_sum = tail_sum = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < n; j++) { t_elem = T_i[tij]; x_elem = x_i[xj]; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = x_elem * split; a1 = con - x_elem; a1 = con - a1; a2 = x_elem - a1; con = t_elem * split; b1 = con - t_elem; b1 = con - b1; b2 = t_elem - b1; head_prod = x_elem * t_elem; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } xj += inc_x; tij += inc_tij; } if (alpha_i == 1.0) { x_i[xj - inc_x] = head_sum; } else { { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp = t1 + t2; tail_tmp = t2 - (head_tmp - t1); } x_i[xj - inc_x] = head_tmp; } ti += inc_ti; } } } FPU_FIX_STOP; break; } } }
void BLAS_dwaxpby_s_s_x(int n, double alpha, const float *x, int incx, double beta , const float *y, int incy, double *w, int incw , enum blas_prec_type prec) /* * Purpose * ======= * * This routine computes: * * w <- alpha * x + beta * y * * Arguments * ========= * * n (input) int * The length of vectors x, y, and w. * * alpha (input) double * * x (input) const float* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * * beta (input) double * * y (input) float* * Array of length n. * * incy (input) int * The stride used to access components y[i]. * * w (output) double* * Array of length n. * * incw (input) int * The stride used to write components w[i]. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { char *routine_name = "BLAS_dwaxpby_s_s_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous:{ int i , ix = 0, iy = 0, iw = 0; double *w_i = w; const float *x_i = x; const float *y_i = y; double alpha_i = alpha; double beta_i = beta; float x_ii; float y_ii; double tmpx; double tmpy; /* Test the input parameters. */ if (incx == 0) BLAS_error(routine_name, -4, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -7, incy, NULL); else if (incw == 0) BLAS_error(routine_name, -9, incw, NULL); /* Immediate return */ if (n <= 0) { return; } if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; if (incw < 0) iw = (-n + 1) * incw; for (i = 0; i < n; ++i) { x_ii = x_i[ix]; y_ii = y_i[iy]; tmpx = alpha_i * x_ii; /* tmpx = alpha * x[ix] */ tmpy = beta_i * y_ii; /* tmpy = beta * y[iy] */ tmpy = tmpy + tmpx; w_i[iw] = tmpy; ix += incx; iy += incy; iw += incw; } /* endfor */ break; } case blas_prec_extra:{ int i , ix = 0, iy = 0, iw = 0; double *w_i = w; const float *x_i = x; const float *y_i = y; double alpha_i = alpha; double beta_i = beta; float x_ii; float y_ii; double head_tmpx, tail_tmpx; double head_tmpy, tail_tmpy; FPU_FIX_DECL; /* Test the input parameters. */ if (incx == 0) BLAS_error(routine_name, -4, incx, NULL); else if (incy == 0) BLAS_error(routine_name, -7, incy, NULL); else if (incw == 0) BLAS_error(routine_name, -9, incw, NULL); /* Immediate return */ if (n <= 0) { return; } FPU_FIX_START; if (incx < 0) ix = (-n + 1) * incx; if (incy < 0) iy = (-n + 1) * incy; if (incw < 0) iw = (-n + 1) * incw; for (i = 0; i < n; ++i) { x_ii = x_i[ix]; y_ii = y_i[iy]; { double dt = (double)x_ii; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = alpha_i * split; a1 = con - alpha_i; a1 = con - a1; a2 = alpha_i - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_tmpx = alpha_i * dt; tail_tmpx = (((a1 * b1 - head_tmpx) + a1 * b2) + a2 * b1) + a2 * b2; } } /* tmpx = alpha * x[ix] */ { double dt = (double)y_ii; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = beta_i * split; a1 = con - beta_i; a1 = con - a1; a2 = beta_i - a1; con = dt * split; b1 = con - dt; b1 = con - b1; b2 = dt - b1; head_tmpy = beta_i * dt; tail_tmpy = (((a1 * b1 - head_tmpy) + a1 * b2) + a2 * b1) + a2 * b2; } } /* tmpy = beta * y[iy] */ { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_tmpy + head_tmpx; bv = s1 - head_tmpy; s2 = ((head_tmpx - bv) + (head_tmpy - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmpy + tail_tmpx; bv = t1 - tail_tmpy; t2 = ((tail_tmpx - bv) + (tail_tmpy - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmpy = t1 + t2; tail_tmpy = t2 - (head_tmpy - t1); } w_i[iw] = head_tmpy; ix += incx; iy += incy; iw += incw; } /* endfor */ FPU_FIX_STOP; break; } } }
void BLAS_dge_sum_mv_s_d(enum blas_order_type order, int m, int n, double alpha , const float *a, int lda, const double *x, int incx, double beta , const float *b, int ldb, double *y, int incy) /* * Purpose * ======= * * Computes y = alpha * A * x + beta * B * y, * where A, B are general matricies. * * Arguments * ========= * * order (input) blas_order_type * Order of A; row or column major * * m (input) int * Row Dimension of A, B, length of output vector y * * n (input) int * Column Dimension of A, B and the length of vector x * * alpha (input) double * * A (input) const float* * * lda (input) int * Leading dimension of A * * x (input) const double* * * incx (input) int * The stride for vector x. * * beta (input) double * * b (input) const float* * * ldb (input) int * Leading dimension of B * * y (input/output) double* * * incy (input) int * The stride for vector y. * */ { /* Routine name */ static const char routine_name[] = "BLAS_dge_sum_mv_s_d"; int i , j; int xi , yi; int x_starti, y_starti, incxi, incyi; int lda_min; int ai; int incai; int aij; int incaij; int bi; int incbi; int bij; int incbij; const float *a_i = a; const float *b_i = b; const double *x_i = x; double *y_i = y; double alpha_i = alpha; double beta_i = beta; float a_elem; float b_elem; double x_elem; double prod; double sumA; double sumB; double tmp1; double tmp2; /* m is number of rows */ /* n is number of columns */ if (m == 0 || n == 0) return; /* all error calls */ if (order == blas_rowmajor) { lda_min = n; incai = lda; /* row stride */ incbi = ldb; incbij = incaij = 1; /* column stride */ } else if (order == blas_colmajor) { lda_min = m; incai = incbi = 1; /* row stride */ incaij = lda; /* column stride */ incbij = ldb; } else { /* error, order not blas_colmajor not blas_rowmajor */ BLAS_error(routine_name, -1, order, 0); return; } if (m < 0) BLAS_error(routine_name, -2, m, 0); else if (n < 0) BLAS_error(routine_name, -3, n, 0); if (lda < lda_min) BLAS_error(routine_name, -6, lda, 0); else if (ldb < lda_min) BLAS_error(routine_name, -11, ldb, 0); else if (incx == 0) BLAS_error(routine_name, -8, incx, 0); else if (incy == 0) BLAS_error(routine_name, -13, incy, 0); incxi = incx; incyi = incy; if (incxi > 0) x_starti = 0; else x_starti = (1 - n) * incxi; if (incyi > 0) y_starti = 0; else y_starti = (1 - m) * incyi; if (alpha_i == 0.0) { if (beta_i == 0.0) { /* alpha, beta are 0.0 */ for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { y_i[yi] = 0.0; } } else if (beta_i == 1.0) { /* alpha is 0.0, beta is 1.0 */ bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumB = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; b_elem = b_i[bij]; prod = b_elem * x_elem; sumB = sumB + prod; bij += incbij; } /* now put the result into y_i */ y_i[yi] = sumB; bi += incbi; } } else { /* alpha is 0.0, beta not 1.0 nor 0.0 */ bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumB = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; b_elem = b_i[bij]; prod = b_elem * x_elem; sumB = sumB + prod; bij += incbij; } /* now put the result into y_i */ tmp1 = sumB * beta_i; y_i[yi] = tmp1; bi += incbi; } } } else if (alpha_i == 1.0) { if (beta_i == 0.0) { /* alpha is 1.0, beta is 0.0 */ ai = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA = 0.0; aij = ai; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; a_elem = a_i[aij]; prod = a_elem * x_elem; sumA = sumA + prod; aij += incaij; } /* now put the result into y_i */ y_i[yi] = sumA; ai += incai; } } else if (beta_i == 1.0) { /* alpha is 1.0, beta is 1.0 */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA = 0.0; aij = ai; sumB = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; a_elem = a_i[aij]; prod = a_elem * x_elem; sumA = sumA + prod; aij += incaij; b_elem = b_i[bij]; prod = b_elem * x_elem; sumB = sumB + prod; bij += incbij; } /* now put the result into y_i */ tmp1 = sumA; tmp2 = sumB; tmp1 = tmp1 + tmp2; y_i[yi] = tmp1; ai += incai; bi += incbi; } } else { /* alpha is 1.0, beta is other */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA = 0.0; aij = ai; sumB = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; a_elem = a_i[aij]; prod = a_elem * x_elem; sumA = sumA + prod; aij += incaij; b_elem = b_i[bij]; prod = b_elem * x_elem; sumB = sumB + prod; bij += incbij; } /* now put the result into y_i */ tmp1 = sumA; tmp2 = sumB * beta_i; tmp1 = tmp1 + tmp2; y_i[yi] = tmp1; ai += incai; bi += incbi; } } } else { if (beta_i == 0.0) { /* alpha is other, beta is 0.0 */ ai = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA = 0.0; aij = ai; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; a_elem = a_i[aij]; prod = a_elem * x_elem; sumA = sumA + prod; aij += incaij; } /* now put the result into y_i */ tmp1 = sumA * alpha_i; y_i[yi] = tmp1; ai += incai; } } else if (beta_i == 1.0) { /* alpha is other, beta is 1.0 */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA = 0.0; aij = ai; sumB = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; a_elem = a_i[aij]; prod = a_elem * x_elem; sumA = sumA + prod; aij += incaij; b_elem = b_i[bij]; prod = b_elem * x_elem; sumB = sumB + prod; bij += incbij; } /* now put the result into y_i */ tmp1 = sumA * alpha_i; tmp2 = sumB; tmp1 = tmp1 + tmp2; y_i[yi] = tmp1; ai += incai; bi += incbi; } } else { /* most general form, alpha, beta are other */ ai = 0; bi = 0; for (i = 0, yi = y_starti; i < m; i++, yi += incyi) { sumA = 0.0; aij = ai; sumB = 0.0; bij = bi; for (j = 0, xi = x_starti; j < n; j++, xi += incxi) { x_elem = x_i[xi]; a_elem = a_i[aij]; prod = a_elem * x_elem; sumA = sumA + prod; aij += incaij; b_elem = b_i[bij]; prod = b_elem * x_elem; sumB = sumB + prod; bij += incbij; } /* now put the result into y_i */ tmp1 = sumA * alpha_i; tmp2 = sumB * beta_i; tmp1 = tmp1 + tmp2; y_i[yi] = tmp1; ai += incai; bi += incbi; } } } } /* end BLAS_dge_sum_mv_s_d */
void BLAS_zhemv_z_c(enum blas_order_type order, enum blas_uplo_type uplo, int n , const void *alpha, const void *a, int lda, const void *x, int incx, const void *beta, void *y, int incy) /* * Purpose * ======= * * This routines computes the matrix product: * * y <- alpha * A * x + beta * y * * where A is a Hermitian matrix. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input hermitian matrix A. * * uplo (input) enum blas_uplo_type * Determines which half of matrix A (upper or lower triangle) * is accessed. * * n (input) int * Dimension of A and size of vectors x, y. * * alpha (input) const void* * * a (input) void* * Matrix A. * * lda (input) int * Leading dimension of matrix A. * * x (input) void* * Vector x. * * incx (input) int * Stride for vector x. * * beta (input) const void* * * y (input/output) void* * Vector y. * * incy (input) int * Stride for vector y. * */ { /* Routine name */ static const char routine_name[] = "BLAS_zhemv_z_c"; /* Integer Index Variables */ int i , k; int xi , yi; int aik , astarti, x_starti, y_starti; int incai; int incaik , incaik2; int n_i; /* Input Matrices */ const double *a_i = (double *)a; const float *x_i = (float *)x; /* Output Vector */ double *y_i = (double *)y; /* Input Scalars */ double *alpha_i = (double *)alpha; double *beta_i = (double *)beta; /* Temporary Floating-Point Variables */ double a_elem [2]; float x_elem [2]; double y_elem [2]; double prod [2]; double sum [2]; double tmp1 [2]; double tmp2 [2]; /* Test for no-op */ if (n <= 0) { return; } if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) { return; } /* Check for error conditions. */ if (order != blas_colmajor && order != blas_rowmajor) { BLAS_error(routine_name, -1, order, NULL); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, NULL); } if (lda < n) { BLAS_error(routine_name, -3, n, NULL); } if (incx == 0) { BLAS_error(routine_name, -8, incx, NULL); } if (incy == 0) { BLAS_error(routine_name, -11, incy, NULL); } /* Set Index Parameters */ n_i = n; if ((order == blas_colmajor && uplo == blas_upper) || (order == blas_rowmajor && uplo == blas_lower)) { incai = lda; incaik = 1; incaik2 = lda; } else { incai = 1; incaik = lda; incaik2 = 1; } /* Adjustment to increments (if any) */ incx *= 2; incy *= 2; incai *= 2; incaik *= 2; incaik2 *= 2; if (incx < 0) { x_starti = (-n_i + 1) * incx; } else { x_starti = 0; } if (incy < 0) { y_starti = (-n_i + 1) * incy; } else { y_starti = 0; } /* alpha = 0. In this case, just return beta * y */ if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) { y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp1[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp1[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } else { /* determine whether we conjugate in first loop or second loop */ if (uplo == blas_lower) { /* conjugate second */ /* Case alpha == 1. */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incaik2) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; k++; aik += incaik2; xi += incx; for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_i[yi] = sum[0]; y_i[yi + 1] = sum[1]; } } else { /* * Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incaik2) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; k++; aik += incaik2; xi += incx; for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } tmp1[0] = sum[0]; tmp1[1] = sum[1]; tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incaik2) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; k++; aik += incaik2; xi += incx; for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } { tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } } else { /* conjugate first loop */ /* Case alpha == 1. */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* Case alpha = 1, beta = 0. We compute y <--- A * x */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incaik2) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; k++; aik += incaik2; xi += incx; for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_i[yi] = sum[0]; y_i[yi + 1] = sum[1]; } } else { /* * Case alpha = 1, but beta != 0. We compute y <--- A * x + beta * * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incaik2) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; k++; aik += incaik2; xi += incx; for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } tmp1[0] = sum[0]; tmp1[1] = sum[1]; tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } } else { /* The most general form, y <--- alpha * A * x + beta * y */ for (i = 0, yi = y_starti, astarti = 0; i < n_i; i++, yi += incy, astarti += incaik2) { sum[0] = sum[1] = 0.0; for (k = 0, aik = astarti, xi = x_starti; k < i; k++, aik += incaik, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } a_elem[0] = a_i[aik]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = x_elem[0] * a_elem[0]; prod[1] = x_elem[1] * a_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; k++; aik += incaik2; xi += incx; for (; k < n_i; k++, aik += incaik2, xi += incx) { a_elem[0] = a_i[aik]; a_elem[1] = a_i[aik + 1]; x_elem[0] = x_i[xi]; x_elem[1] = x_i[xi + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; } y_elem[0] = y_i[yi]; y_elem[1] = y_i[yi + 1]; { tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } { tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } tmp1[0] = tmp2[0] + tmp1[0]; tmp1[1] = tmp2[1] + tmp1[1]; y_i[yi] = tmp1[0]; y_i[yi + 1] = tmp1[1]; } } } } } /* end BLAS_zhemv_z_c */
void BLAS_dgemm_s_d_x(enum blas_order_type order, enum blas_trans_type transa, enum blas_trans_type transb, int m, int n, int k, double alpha , const float *a, int lda, const double *b, int ldb, double beta , double *c, int ldc, enum blas_prec_type prec) /* * Purpose * ======= * * This routine computes the matrix product: * * C <- alpha * op(A) * op(B) + beta * C . * * where op(M) represents either M, M transpose, * or M conjugate transpose. * * Arguments * ========= * * order (input) enum blas_order_type * Storage format of input matrices A, B, and C. * * transa (input) enum blas_trans_type * Operation to be done on matrix A before multiplication. * Can be no operation, transposition, or conjugate transposition. * * transb (input) enum blas_trans_type * Operation to be done on matrix B before multiplication. * Can be no operation, transposition, or conjugate transposition. * * m n k (input) int * The dimensions of matrices A, B, and C. * Matrix C is m-by-n matrix. * Matrix A is m-by-k if A is not transposed, * k-by-m otherwise. * Matrix B is k-by-n if B is not transposed, * n-by-k otherwise. * * alpha (input) double * * a (input) const float* * matrix A. * * lda (input) int * leading dimension of A. * * b (input) const double* * matrix B * * ldb (input) int * leading dimension of B. * * beta (input) double * * c (input/output) double* * matrix C * * ldc (input) int * leading dimension of C. * * prec (input) enum blas_prec_type * Specifies the internal precision to be used. * = blas_prec_single: single precision. * = blas_prec_double: double precision. * = blas_prec_extra : anything at least 1.5 times as accurate * than double, and wider than 80-bits. * We use double-double in our implementation. * */ { static const char routine_name[] = "BLAS_dgemm_s_d_x"; switch (prec) { case blas_prec_single: case blas_prec_double: case blas_prec_indigenous:{ /* Integer Index Variables */ int i , j, h; int ai , bj, ci; int aih , bhj, cij; /* Index into matrices a, b, c during * multiply */ int incai, incaih; /* Index increments for matrix a */ int incbj, incbhj; /* Index increments for matrix b */ int incci, inccij; /* Index increments for matrix c */ /* Input Matrices */ const float *a_i = a; const double *b_i = b; /* Output Matrix */ double *c_i = c; /* Input Scalars */ double alpha_i = alpha; double beta_i = beta; /* Temporary Floating-Point Variables */ float a_elem; double b_elem; double c_elem; double prod; double sum; double tmp1; double tmp2; /* Test for error conditions */ if (m < 0) BLAS_error(routine_name, -4, m, NULL); if (n < 0) BLAS_error(routine_name, -5, n, NULL); if (k < 0) BLAS_error(routine_name, -6, k, NULL); if (order == blas_colmajor) { if (ldc < m) BLAS_error(routine_name, -14, ldc, NULL); if (transa == blas_no_trans) { if (lda < m) BLAS_error(routine_name, -9, lda, NULL); } else { if (lda < k) BLAS_error(routine_name, -9, lda, NULL); } if (transb == blas_no_trans) { if (ldb < k) BLAS_error(routine_name, -11, ldb, NULL); } else { if (ldb < n) BLAS_error(routine_name, -11, ldb, NULL); } } else { /* row major */ if (ldc < n) BLAS_error(routine_name, -14, ldc, NULL); if (transa == blas_no_trans) { if (lda < k) BLAS_error(routine_name, -9, lda, NULL); } else { if (lda < m) BLAS_error(routine_name, -9, lda, NULL); } if (transb == blas_no_trans) { if (ldb < n) BLAS_error(routine_name, -11, ldb, NULL); } else { if (ldb < k) BLAS_error(routine_name, -11, ldb, NULL); } } /* Test for no-op */ if (n == 0 || m == 0 || k == 0) return; if (alpha_i == 0.0 && beta_i == 1.0) { return; } /* Set Index Parameters */ if (order == blas_colmajor) { incci = 1; inccij = ldc; if (transa == blas_no_trans) { incai = 1; incaih = lda; } else { incai = lda; incaih = 1; } if (transb == blas_no_trans) { incbj = ldb; incbhj = 1; } else { incbj = 1; incbhj = ldb; } } else { /* row major */ incci = ldc; inccij = 1; if (transa == blas_no_trans) { incai = lda; incaih = 1; } else { incai = 1; incaih = lda; } if (transb == blas_no_trans) { incbj = 1; incbhj = ldb; } else { incbj = ldb; incbhj = 1; } } /* Ajustment to increments */ /* alpha = 0. In this case, just return beta * C */ if (alpha_i == 0.0) { ci = 0; for (i = 0; i < m; i++, ci += incci) { cij = ci; for (j = 0; j < n; j++, cij += inccij) { c_elem = c_i[cij]; tmp1 = c_elem * beta_i; c_i[cij] = tmp1; } } } else if (alpha_i == 1.0) { /* Case alpha == 1. */ if (beta_i == 0.0) { /* Case alpha == 1, beta == 0. We compute C <--- A * B */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } prod = a_elem * b_elem; sum = sum + prod; } c_i[cij] = sum; } } } else { /* * Case alpha == 1, but beta != 0. We compute C <--- A * B + beta * * C */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } prod = a_elem * b_elem; sum = sum + prod; } c_elem = c_i[cij]; tmp2 = c_elem * beta_i; tmp1 = sum; tmp1 = tmp2 + tmp1; c_i[cij] = tmp1; } } } } else { /* The most general form, C <-- alpha * A * B + beta * C */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } prod = a_elem * b_elem; sum = sum + prod; } tmp1 = sum * alpha_i; c_elem = c_i[cij]; tmp2 = c_elem * beta_i; tmp1 = tmp1 + tmp2; c_i[cij] = tmp1; } } } break; } case blas_prec_extra:{ /* Integer Index Variables */ int i , j, h; int ai , bj, ci; int aih , bhj, cij; /* Index into matrices a, b, c during * multiply */ int incai, incaih; /* Index increments for matrix a */ int incbj, incbhj; /* Index increments for matrix b */ int incci, inccij; /* Index increments for matrix c */ /* Input Matrices */ const float *a_i = a; const double *b_i = b; /* Output Matrix */ double *c_i = c; /* Input Scalars */ double alpha_i = alpha; double beta_i = beta; /* Temporary Floating-Point Variables */ float a_elem; double b_elem; double c_elem; double head_prod, tail_prod; double head_sum, tail_sum; double head_tmp1, tail_tmp1; double head_tmp2, tail_tmp2; FPU_FIX_DECL; /* Test for error conditions */ if (m < 0) BLAS_error(routine_name, -4, m, NULL); if (n < 0) BLAS_error(routine_name, -5, n, NULL); if (k < 0) BLAS_error(routine_name, -6, k, NULL); if (order == blas_colmajor) { if (ldc < m) BLAS_error(routine_name, -14, ldc, NULL); if (transa == blas_no_trans) { if (lda < m) BLAS_error(routine_name, -9, lda, NULL); } else { if (lda < k) BLAS_error(routine_name, -9, lda, NULL); } if (transb == blas_no_trans) { if (ldb < k) BLAS_error(routine_name, -11, ldb, NULL); } else { if (ldb < n) BLAS_error(routine_name, -11, ldb, NULL); } } else { /* row major */ if (ldc < n) BLAS_error(routine_name, -14, ldc, NULL); if (transa == blas_no_trans) { if (lda < k) BLAS_error(routine_name, -9, lda, NULL); } else { if (lda < m) BLAS_error(routine_name, -9, lda, NULL); } if (transb == blas_no_trans) { if (ldb < n) BLAS_error(routine_name, -11, ldb, NULL); } else { if (ldb < k) BLAS_error(routine_name, -11, ldb, NULL); } } /* Test for no-op */ if (n == 0 || m == 0 || k == 0) return; if (alpha_i == 0.0 && beta_i == 1.0) { return; } /* Set Index Parameters */ if (order == blas_colmajor) { incci = 1; inccij = ldc; if (transa == blas_no_trans) { incai = 1; incaih = lda; } else { incai = lda; incaih = 1; } if (transb == blas_no_trans) { incbj = ldb; incbhj = 1; } else { incbj = 1; incbhj = ldb; } } else { /* row major */ incci = ldc; inccij = 1; if (transa == blas_no_trans) { incai = lda; incaih = 1; } else { incai = 1; incaih = lda; } if (transb == blas_no_trans) { incbj = 1; incbhj = ldb; } else { incbj = ldb; incbhj = 1; } } FPU_FIX_START; /* Ajustment to increments */ /* alpha = 0. In this case, just return beta * C */ if (alpha_i == 0.0) { ci = 0; for (i = 0; i < m; i++, ci += incci) { cij = ci; for (j = 0; j < n; j++, cij += inccij) { c_elem = c_i[cij]; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = c_elem * split; a1 = con - c_elem; a1 = con - a1; a2 = c_elem - a1; con = beta_i * split; b1 = con - beta_i; b1 = con - b1; b2 = beta_i - b1; head_tmp1 = c_elem * beta_i; tail_tmp1 = (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2; } c_i[cij] = head_tmp1; } } } else if (alpha_i == 1.0) { /* Case alpha == 1. */ if (beta_i == 0.0) { /* Case alpha == 1, beta == 0. We compute C <--- A * B */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; head_sum = tail_sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } { double dt = (double)a_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = b_elem * split; b1 = con - b_elem; b1 = con - b1; b2 = b_elem - b1; head_prod = dt * b_elem; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } } c_i[cij] = head_sum; } } } else { /* * Case alpha == 1, but beta != 0. We compute C <--- A * B + beta * * C */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; head_sum = tail_sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } { double dt = (double)a_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = b_elem * split; b1 = con - b_elem; b1 = con - b1; b2 = b_elem - b1; head_prod = dt * b_elem; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } } c_elem = c_i[cij]; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = c_elem * split; a1 = con - c_elem; a1 = con - a1; a2 = c_elem - a1; con = beta_i * split; b1 = con - beta_i; b1 = con - b1; b2 = beta_i - b1; head_tmp2 = c_elem * beta_i; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } head_tmp1 = head_sum; tail_tmp1 = tail_sum; { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_tmp2 + head_tmp1; bv = s1 - head_tmp2; s2 = ((head_tmp1 - bv) + (head_tmp2 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp2 + tail_tmp1; bv = t1 - tail_tmp2; t2 = ((tail_tmp1 - bv) + (tail_tmp2 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } c_i[cij] = head_tmp1; } } } } else { /* The most general form, C <-- alpha * A * B + beta * C */ ci = 0; ai = 0; for (i = 0; i < m; i++, ci += incci, ai += incai) { cij = ci; bj = 0; for (j = 0; j < n; j++, cij += inccij, bj += incbj) { aih = ai; bhj = bj; head_sum = tail_sum = 0.0; for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) { a_elem = a_i[aih]; b_elem = b_i[bhj]; if (transa == blas_conj_trans) { } if (transb == blas_conj_trans) { } { double dt = (double)a_elem; { /* Compute double_double = double * double. */ double a1 , a2, b1, b2, con; con = dt * split; a1 = con - dt; a1 = con - a1; a2 = dt - a1; con = b_elem * split; b1 = con - b_elem; b1 = con - b1; b2 = b_elem - b1; head_prod = dt * b_elem; tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2; } } { /* Compute double-double = double-double + double-double. */ double bv; double s1 , s2, t1, t2; /* Add two hi words. */ s1 = head_sum + head_prod; bv = s1 - head_sum; s2 = ((head_prod - bv) + (head_sum - (s1 - bv))); /* Add two lo words. */ t1 = tail_sum + tail_prod; bv = t1 - tail_sum; t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_sum = t1 + t2; tail_sum = t2 - (head_sum - t1); } } { /* Compute double-double = double-double * double. */ double a11, a21, b1, b2, c11, c21, c2, con, t1, t2; con = head_sum * split; a11 = con - head_sum; a11 = con - a11; a21 = head_sum - a11; con = alpha_i * split; b1 = con - alpha_i; b1 = con - b1; b2 = alpha_i - b1; c11 = head_sum * alpha_i; c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2; c2 = tail_sum * alpha_i; t1 = c11 + c2; t2 = (c2 - (t1 - c11)) + c21; head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } c_elem = c_i[cij]; { /* Compute double_double = double * double. */ double a1, a2, b1, b2, con; con = c_elem * split; a1 = con - c_elem; a1 = con - a1; a2 = c_elem - a1; con = beta_i * split; b1 = con - beta_i; b1 = con - b1; b2 = beta_i - b1; head_tmp2 = c_elem * beta_i; tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2; } { /* Compute double-double = double-double + double-double. */ double bv; double s1, s2, t1, t2; /* Add two hi words. */ s1 = head_tmp1 + head_tmp2; bv = s1 - head_tmp1; s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv))); /* Add two lo words. */ t1 = tail_tmp1 + tail_tmp2; bv = t1 - tail_tmp1; t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv))); s2 += t1; /* Renormalize (s1, s2) to (t1, s2) */ t1 = s1 + s2; s2 = s2 - (t1 - s1); t2 += s2; /* Renormalize (t1, t2) */ head_tmp1 = t1 + t2; tail_tmp1 = t2 - (head_tmp1 - t1); } c_i[cij] = head_tmp1; } } } FPU_FIX_STOP; break; } } }
void BLAS_ctrmv_s(enum blas_order_type order, enum blas_uplo_type uplo, enum blas_trans_type trans, enum blas_diag_type diag, int n, const void *alpha, const float *T, int ldt, void *x, int incx) /* * Purpose * ======= * * Computes x <-- alpha * T * x, where T is a triangular matrix. * * Arguments * ========= * * order (input) enum blas_order_type * column major, row major * * uplo (input) enum blas_uplo_type * upper, lower * * trans (input) enum blas_trans_type * no trans, trans, conj trans * * diag (input) enum blas_diag_type * unit, non unit * * n (input) int * the dimension of T * * alpha (input) const void* * * T (input) float* * Triangular matrix * * ldt (input) int * Leading dimension of T * * x (input) const void* * Array of length n. * * incx (input) int * The stride used to access components x[i]. * */ { static const char routine_name[] = "BLAS_ctrmv_s"; int i, j; /* used to idx matrix */ int xj, xj0; int ti, tij, tij0; int inc_ti, inc_tij; int inc_x; const float *T_i = T; /* internal matrix T */ float *x_i = (float *) x; /* internal x */ float *alpha_i = (float *) alpha; /* internal alpha */ float t_elem; float x_elem[2]; float prod[2]; float sum[2]; float tmp[2]; /* all error calls */ if ((order != blas_rowmajor && order != blas_colmajor) || (uplo != blas_upper && uplo != blas_lower) || (trans != blas_trans && trans != blas_no_trans && trans != blas_conj_trans) || (diag != blas_non_unit_diag && diag != blas_unit_diag) || (ldt < n) || (incx == 0)) { BLAS_error(routine_name, 0, 0, NULL); } else if (n <= 0) { BLAS_error(routine_name, -4, n, NULL); } else if (incx == 0) { BLAS_error(routine_name, -9, incx, NULL); } if (trans == blas_no_trans) { if (uplo == blas_upper) { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = ldt; inc_tij = -1; } else { inc_ti = 1; inc_tij = -ldt; } } else { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -ldt; inc_tij = 1; } else { inc_ti = -1; inc_tij = ldt; } } } else { if (uplo == blas_upper) { inc_x = incx; if (order == blas_rowmajor) { inc_ti = -1; inc_tij = ldt; } else { inc_ti = -ldt; inc_tij = 1; } } else { inc_x = -incx; if (order == blas_rowmajor) { inc_ti = 1; inc_tij = -ldt; } else { inc_ti = ldt; inc_tij = -1; } } } inc_x *= 2; xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x); if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { xj = xj0; for (j = 0; j < n; j++) { x_i[xj] = 0.0; x_i[xj + 1] = 0.0; xj += inc_x; } } else { if (diag == blas_unit_diag) { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum[0] = sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < (n - 1); j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { prod[0] = x_elem[0] * t_elem; prod[1] = x_elem[1] * t_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; xj += inc_x; tij += inc_tij; } x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; sum[0] = sum[0] + x_elem[0]; sum[1] = sum[1] + x_elem[1]; if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj] = sum[0]; x_i[xj + 1] = sum[1]; } else { { tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } x_i[xj] = tmp[0]; x_i[xj + 1] = tmp[1]; } ti += inc_ti; } } else { ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti); tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij); for (i = 0; i < n; i++) { sum[0] = sum[1] = 0.0; xj = xj0; tij = ti + tij0; for (j = i; j < n; j++) { t_elem = T_i[tij]; x_elem[0] = x_i[xj]; x_elem[1] = x_i[xj + 1]; { prod[0] = x_elem[0] * t_elem; prod[1] = x_elem[1] * t_elem; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; xj += inc_x; tij += inc_tij; } if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { x_i[xj - inc_x] = sum[0]; x_i[xj - inc_x + 1] = sum[1]; } else { { tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1]; tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0]; } x_i[xj - inc_x] = tmp[0]; x_i[xj - inc_x + 1] = tmp[1]; } ti += inc_ti; } } } }
void BLAS_zgemv_c_c(enum blas_order_type order, enum blas_trans_type trans, int m , int n, const void *alpha, const void *a, int lda, const void *x, int incx, const void *beta, void *y, int incy) /* * Purpose * ======= * * Computes y = alpha * A * x + beta * y, where A is a general matrix. * * Arguments * ========= * * order (input) blas_order_type * Order of AP; row or column major * * trans (input) blas_trans_type * Transpose of AB; no trans, * trans, or conjugate trans * * m (input) int * Dimension of AB * * n (input) int * Dimension of AB and the length of vector x * * alpha (input) const void* * * A (input) const void* * * lda (input) int * Leading dimension of A * * x (input) const void* * * incx (input) int * The stride for vector x. * * beta (input) const void* * * y (input/output) void* * * incy (input) int * The stride for vector y. * */ { static const char routine_name[] = "BLAS_zgemv_c_c"; int i , j; int iy , jx, kx, ky; int lenx , leny; int ai , aij; int incai , incaij; const float *a_i = (float *)a; const float *x_i = (float *)x; double *y_i = (double *)y; double *alpha_i = (double *)alpha; double *beta_i = (double *)beta; float a_elem [2]; float x_elem [2]; double y_elem [2]; double prod [2]; double sum [2]; double tmp1 [2]; double tmp2 [2]; /* all error calls */ if (m < 0) BLAS_error(routine_name, -3, m, 0); else if (n <= 0) BLAS_error(routine_name, -4, n, 0); else if (incx == 0) BLAS_error(routine_name, -9, incx, 0); else if (incy == 0) BLAS_error(routine_name, -12, incy, 0); if ((order == blas_rowmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = lda; incaij = 1; } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) { lenx = m; leny = n; incai = 1; incaij = lda; } else if ((order == blas_colmajor) && (trans == blas_no_trans)) { lenx = n; leny = m; incai = 1; incaij = lda; } else { /* colmajor and blas_trans */ lenx = m; leny = n; incai = lda; incaij = 1; } if ((order == blas_colmajor && lda < m) || (order == blas_rowmajor && lda < n)) BLAS_error(routine_name, -7, lda, NULL); incx *= 2; incy *= 2; incai *= 2; incaij *= 2; if (incx > 0) kx = 0; else kx = (1 - lenx) * incx; if (incy > 0) ky = 0; else ky = (1 - leny) * incy; /* No extra-precision needed for alpha = 0 */ if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) { if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { iy = ky; for (i = 0; i < leny; i++) { y_i[iy] = 0.0; y_i[iy + 1] = 0.0; iy += incy; } } else if (!(beta_i[0] == 0.0 && beta_i[1] == 0.0)) { iy = ky; for (i = 0; i < leny; i++) { y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp1[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp1[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; iy += incy; } } } else { if (trans == blas_conj_trans) { /* if beta = 0, we can save m multiplies: y = alpha*A*x */ if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* save m more multiplies if alpha = 1 */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[jx]; x_elem[1] = x_i[jx + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } y_i[iy] = sum[0]; y_i[iy + 1] = sum[1]; ai += incai; iy += incy; } } else { ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[jx]; x_elem[1] = x_i[jx + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } { tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; ai += incai; iy += incy; } } } else { /* the most general form, y = alpha*A*x + beta*y */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; a_elem[1] = -a_elem[1]; x_elem[0] = x_i[jx]; x_elem[1] = x_i[jx + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } { tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; ai += incai; iy += incy; } } } else { /* if beta = 0, we can save m multiplies: y = alpha*A*x */ if (beta_i[0] == 0.0 && beta_i[1] == 0.0) { /* save m more multiplies if alpha = 1 */ if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) { ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[jx]; x_elem[1] = x_i[jx + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } y_i[iy] = sum[0]; y_i[iy + 1] = sum[1]; ai += incai; iy += incy; } } else { ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[jx]; x_elem[1] = x_i[jx + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } { tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; ai += incai; iy += incy; } } } else { /* the most general form, y = alpha*A*x + beta*y */ ai = 0; iy = ky; for (i = 0; i < leny; i++) { sum[0] = sum[1] = 0.0;; aij = ai; jx = kx; for (j = 0; j < lenx; j++) { a_elem[0] = a_i[aij]; a_elem[1] = a_i[aij + 1]; x_elem[0] = x_i[jx]; x_elem[1] = x_i[jx + 1]; { prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1]; prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0]; } sum[0] = sum[0] + prod[0]; sum[1] = sum[1] + prod[1]; aij += incaij; jx += incx; } { tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1]; tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0]; } y_elem[0] = y_i[iy]; y_elem[1] = y_i[iy + 1]; { tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1]; tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0]; } tmp1[0] = tmp1[0] + tmp2[0]; tmp1[1] = tmp1[1] + tmp2[1]; y_i[iy] = tmp1[0]; y_i[iy + 1] = tmp1[1]; ai += incai; iy += incy; } } } } }