Exemplo n.º 1
0
static
void
BLAS_csy_norm(enum blas_order_type order, enum blas_norm_type norm,
  enum blas_uplo_type uplo, int n, const PLASMA_Complex32_t *a, int lda, float *res) {
  int i, j; float anorm, v;
  char rname[] = "BLAS_csy_norm";

  if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 );

  if (norm == blas_inf_norm) {
    anorm = 0.0;
    if (blas_upper == uplo) {
      for (i = 0; i < n; ++i) {
        v = 0.0;
        for (j = 0; j < i; ++j) {
          v += cabsf( a[j + i * lda] );
        }
        for (j = i; j < n; ++j) {
          v += cabsf( a[i + j * lda] );
        }
        if (v > anorm)
          anorm = v;
      }
    } else {
      BLAS_error( rname, -3, norm, 0 );
      return;
    }
  } else {
    BLAS_error( rname, -2, norm, 0 );
    return;
  }

  if (res) *res = anorm;
}
Exemplo n.º 2
0
static
double
BLAS_dfpinfo(enum blas_cmach_type cmach) {
  double eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
  int t = 53, l = 1024, m = -1021;
  char rname[] = "BLAS_dfpinfo";

  if ((sizeof eps) == sizeof(float)) {
    t = 24;
    l = 128;
    m = -125;
  } else {
    t = 53;
    l = 1024;
    m = -1021;
  }

  /* for (i = 0; i < t; ++i) eps *= half; */
  eps = BLAS_dpow_di( b, -t );
  /* for (i = 0; i >= m; --i) r *= half; */
  r = BLAS_dpow_di( b, m-1 );

  o -= eps;
  /* for (i = 0; i < l; ++i) o *= b; */
  o = (o * BLAS_dpow_di( b, l-1 )) * b;

  switch (cmach) {
    case blas_eps: return eps;
    case blas_sfmin: return r;
    default:
      BLAS_error( rname, -1, cmach, 0 );
      break;
  }
  return 0.0;
}
Exemplo n.º 3
0
static
void
BLAS_zge_norm(enum blas_order_type order, enum blas_norm_type norm,
  int m, int n, const PLASMA_Complex64_t *a, int lda, double *res) {
  int i, j; float anorm, v;
  char rname[] = "BLAS_zge_norm";

  if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 );

  if (norm == blas_frobenius_norm) {
    anorm = 0.0f;
    for (j = n; j; --j) {
      for (i = m; i; --i) {
        v = a[0];
        anorm += v * v;
        a++;
      }
      a += lda - m;
    }
    anorm = sqrt( anorm );
  } else if (norm == blas_inf_norm) {
    anorm = 0.0f;
    for (i = 0; i < m; ++i) {
      v = 0.0f;
      for (j = 0; j < n; ++j) {
        v += cabs( a[i + j * lda] );
      }
      if (v > anorm)
        anorm = v;
    }
  } else {
    BLAS_error( rname, -2, norm, 0 );
    return;
  }

  if (res) *res = anorm;
}
Exemplo n.º 4
0
void BLAS_ztpmv_d(enum blas_order_type order, enum blas_uplo_type uplo,
		  enum blas_trans_type trans, enum blas_diag_type diag,
		  int n, const void *alpha, const double *tp,
		  void *x, int incx)

/*
 * Purpose
 * =======
 *
 * Computes x = alpha * tp * x, x = alpha * tp_transpose * x,
 * or x = alpha * tp_conjugate_transpose where tp is a triangular
 * packed matrix.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of tp; row or column major
 *
 * uplo         (input) blas_uplo_type
 *              Whether tp is upper or lower
 *
 * trans        (input) blas_trans_type
 *
 * diag         (input) blas_diag_type
 *              Whether the diagonal entries of tp are 1
 *
 * n            (input) int
 *              Dimension of tp and the length of vector x
 *
 * alpha        (input) const void*
 *
 * tp           (input) double*
 *
 * x            (input) void*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 */
{
  static const char routine_name[] = "BLAS_ztpmv_d";

  {
    int matrix_row, step, tp_index, tp_start, x_index, x_start;
    int inctp, x_index2, stride, col_index, inctp2;

    double *alpha_i = (double *) alpha;

    const double *tp_i = tp;
    double *x_i = (double *) x;
    double rowsum[2];
    double rowtmp[2];
    double result[2];
    double matval;
    double vecval[2];
    double one;


    one = 1.0;

    inctp = 1;

    incx *= 2;

    if (incx < 0)
      x_start = (-n + 1) * incx;
    else
      x_start = 0;

    if (n < 1) {
      return;
    }

    /* Check for error conditions. */
    if (order != blas_colmajor && order != blas_rowmajor) {
      BLAS_error(routine_name, -1, order, NULL);
    }
    if (uplo != blas_upper && uplo != blas_lower) {
      BLAS_error(routine_name, -2, uplo, NULL);
    }
    if (incx == 0) {
      BLAS_error(routine_name, -9, incx, NULL);
    }



    {
      if ((uplo == blas_upper &&
	   trans == blas_no_trans && order == blas_rowmajor) ||
	  (uplo == blas_lower &&
	   trans != blas_no_trans && order == blas_colmajor)) {
	tp_start = 0;
	tp_index = tp_start;
	for (matrix_row = 0; matrix_row < n; matrix_row++) {
	  x_index = x_start + incx * matrix_row;
	  x_index2 = x_index;
	  col_index = matrix_row;
	  rowsum[0] = rowsum[1] = 0.0;
	  rowtmp[0] = rowtmp[1] = 0.0;
	  result[0] = result[1] = 0.0;
	  while (col_index < n) {
	    vecval[0] = x_i[x_index];
	    vecval[1] = x_i[x_index + 1];
	    if ((diag == blas_unit_diag) && (col_index == matrix_row)) {
	      {
		rowtmp[0] = vecval[0] * one;
		rowtmp[1] = vecval[1] * one;
	      }
	    } else {
	      matval = tp_i[tp_index];
	      {
		rowtmp[0] = vecval[0] * matval;
		rowtmp[1] = vecval[1] * matval;
	      }
	    }
	    rowsum[0] = rowsum[0] + rowtmp[0];
	    rowsum[1] = rowsum[1] + rowtmp[1];
	    x_index += incx;
	    tp_index += inctp;
	    col_index++;
	  }
	  {
	    result[0] =
	      (double) rowsum[0] * alpha_i[0] -
	      (double) rowsum[1] * alpha_i[1];
	    result[1] =
	      (double) rowsum[0] * alpha_i[1] +
	      (double) rowsum[1] * alpha_i[0];
	  }
	  x_i[x_index2] = result[0];
	  x_i[x_index2 + 1] = result[1];
	}
      } else if ((uplo == blas_upper &&
		  trans == blas_no_trans && order == blas_colmajor) ||
		 (uplo == blas_lower &&
		  trans != blas_no_trans && order == blas_rowmajor)) {
	tp_start = ((n - 1) * n) / 2;
	inctp2 = n - 1;
	x_index2 = x_start;
	for (matrix_row = 0; matrix_row < n; matrix_row++, inctp2 = n - 1) {
	  x_index = x_start + incx * (n - 1);
	  tp_index = (tp_start + matrix_row) * inctp;
	  col_index = (n - 1) - matrix_row;
	  rowsum[0] = rowsum[1] = 0.0;
	  rowtmp[0] = rowtmp[1] = 0.0;
	  result[0] = result[1] = 0.0;
	  while (col_index >= 0) {
	    vecval[0] = x_i[x_index];
	    vecval[1] = x_i[x_index + 1];
	    if ((diag == blas_unit_diag) && (col_index == 0)) {
	      {
		rowtmp[0] = vecval[0] * one;
		rowtmp[1] = vecval[1] * one;
	      }
	    } else {
	      matval = tp_i[tp_index];
	      {
		rowtmp[0] = vecval[0] * matval;
		rowtmp[1] = vecval[1] * matval;
	      }
	    }
	    rowsum[0] = rowsum[0] + rowtmp[0];
	    rowsum[1] = rowsum[1] + rowtmp[1];
	    x_index -= incx;
	    tp_index -= inctp2 * inctp;
	    inctp2--;
	    col_index--;
	  }
	  {
	    result[0] =
	      (double) rowsum[0] * alpha_i[0] -
	      (double) rowsum[1] * alpha_i[1];
	    result[1] =
	      (double) rowsum[0] * alpha_i[1] +
	      (double) rowsum[1] * alpha_i[0];
	  }
	  x_i[x_index2] = result[0];
	  x_i[x_index2 + 1] = result[1];
	  x_index2 += incx;
	}
      } else if ((uplo == blas_lower &&
		  trans == blas_no_trans && order == blas_rowmajor) ||
		 (uplo == blas_upper &&
		  trans != blas_no_trans && order == blas_colmajor)) {
	tp_start = (n - 1) + ((n - 1) * n) / 2;
	tp_index = tp_start * inctp;
	x_index = x_start + (n - 1) * incx;

	for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) {
	  x_index2 = x_index;
	  rowsum[0] = rowsum[1] = 0.0;
	  rowtmp[0] = rowtmp[1] = 0.0;
	  result[0] = result[1] = 0.0;
	  for (step = 0; step <= matrix_row; step++) {
	    vecval[0] = x_i[x_index2];
	    vecval[1] = x_i[x_index2 + 1];
	    if ((diag == blas_unit_diag) && (step == 0)) {
	      {
		rowtmp[0] = vecval[0] * one;
		rowtmp[1] = vecval[1] * one;
	      }
	    } else {
	      matval = tp_i[tp_index];
	      {
		rowtmp[0] = vecval[0] * matval;
		rowtmp[1] = vecval[1] * matval;
	      }
	    }
	    rowsum[0] = rowsum[0] + rowtmp[0];
	    rowsum[1] = rowsum[1] + rowtmp[1];
	    x_index2 -= incx;
	    tp_index -= inctp;
	  }
	  {
	    result[0] =
	      (double) rowsum[0] * alpha_i[0] -
	      (double) rowsum[1] * alpha_i[1];
	    result[1] =
	      (double) rowsum[0] * alpha_i[1] +
	      (double) rowsum[1] * alpha_i[0];
	  }
	  x_i[x_index] = result[0];
	  x_i[x_index + 1] = result[1];
	  x_index -= incx;
	}
      } else {
	tp_start = 0;
	x_index = x_start + (n - 1) * incx;
	for (matrix_row = n - 1; matrix_row >= 0; matrix_row--) {
	  tp_index = matrix_row * inctp;
	  x_index2 = x_start;
	  rowsum[0] = rowsum[1] = 0.0;
	  rowtmp[0] = rowtmp[1] = 0.0;
	  result[0] = result[1] = 0.0;
	  stride = n;
	  for (step = 0; step <= matrix_row; step++) {
	    vecval[0] = x_i[x_index2];
	    vecval[1] = x_i[x_index2 + 1];
	    if ((diag == blas_unit_diag) && (step == matrix_row)) {
	      {
		rowtmp[0] = vecval[0] * one;
		rowtmp[1] = vecval[1] * one;
	      }
	    } else {
	      matval = tp_i[tp_index];
	      {
		rowtmp[0] = vecval[0] * matval;
		rowtmp[1] = vecval[1] * matval;
	      }
	    }
	    rowsum[0] = rowsum[0] + rowtmp[0];
	    rowsum[1] = rowsum[1] + rowtmp[1];
	    stride--;
	    tp_index += stride * inctp;
	    x_index2 += incx;
	  }
	  {
	    result[0] =
	      (double) rowsum[0] * alpha_i[0] -
	      (double) rowsum[1] * alpha_i[1];
	    result[1] =
	      (double) rowsum[0] * alpha_i[1] +
	      (double) rowsum[1] * alpha_i[0];
	  }
	  x_i[x_index] = result[0];
	  x_i[x_index + 1] = result[1];
	  x_index -= incx;
	}
      }
    }


  }
}
Exemplo n.º 5
0
void BLAS_cdot_s_s_x(enum blas_conj_type conj, int n, const void *alpha,
		     const float *x, int incx, const void *beta,
		     const float *y, int incy,
		     void *r, enum blas_prec_type prec)

/*
 * Purpose
 * =======
 * 
 * This routine computes the inner product:
 * 
 *     r <- beta * r + alpha * SUM_{i=0, n-1} x[i] * y[i].
 * 
 * Arguments
 * =========
 *  
 * conj   (input) enum blas_conj_type
 *        When x and y are complex vectors, specifies whether vector
 *        components x[i] are used unconjugated or conjugated. 
 * 
 * n      (input) int
 *        The length of vectors x and y.
 * 
 * alpha  (input) const void*
 * 
 * x      (input) const float*
 *        Array of length n.
 * 
 * incx   (input) int
 *        The stride used to access components x[i].
 *
 * beta   (input) const void*
 *
 * y      (input) const float*
 *        Array of length n.
 *      
 * incy   (input) int
 *        The stride used to access components y[i].
 *
 * r      (input/output) void*
 * 
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  static const char routine_name[] = "BLAS_cdot_s_s_x";

  switch (prec) {
  case blas_prec_single:{

      int i, ix = 0, iy = 0;
      float *r_i = (float *) r;
      const float *x_i = x;
      const float *y_i = y;
      float *alpha_i = (float *) alpha;
      float *beta_i = (float *) beta;
      float x_ii;
      float y_ii;
      float r_v[2];
      float prod;
      float sum;
      float tmp1[2];
      float tmp2[2];


      /* Test the input parameters. */
      if (n < 0)
	BLAS_error(routine_name, -2, n, NULL);
      else if (incx == 0)
	BLAS_error(routine_name, -5, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -8, incy, NULL);

      /* Immediate return. */
      if (((beta_i[0] == 1.0 && beta_i[1] == 0.0))
	  && (n == 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0)))
	return;



      r_v[0] = r_i[0];
      r_v[1] = r_i[0 + 1];
      sum = 0.0;


      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;

      for (i = 0; i < n; ++i) {
	x_ii = x_i[ix];
	y_ii = y_i[iy];

	prod = x_ii * y_ii;	/* prod = x[i]*y[i] */
	sum = sum + prod;	/* sum = sum+prod */
	ix += incx;
	iy += incy;
      }				/* endfor */


      {
	tmp1[0] = alpha_i[0] * sum;
	tmp1[1] = alpha_i[1] * sum;
      }				/* tmp1 = sum*alpha */
      {
	tmp2[0] = r_v[0] * beta_i[0] - r_v[1] * beta_i[1];
	tmp2[1] = r_v[0] * beta_i[1] + r_v[1] * beta_i[0];
      }
      /* tmp2 = r*beta */
      tmp1[0] = tmp1[0] + tmp2[0];
      tmp1[1] = tmp1[1] + tmp2[1];	/* tmp1 = tmp1+tmp2 */
      ((float *) r)[0] = tmp1[0];
      ((float *) r)[1] = tmp1[1];	/* r = tmp1 */



      break;
    }
  case blas_prec_double:
  case blas_prec_indigenous:
    {
      int i, ix = 0, iy = 0;
      float *r_i = (float *) r;
      const float *x_i = x;
      const float *y_i = y;
      float *alpha_i = (float *) alpha;
      float *beta_i = (float *) beta;
      float x_ii;
      float y_ii;
      float r_v[2];
      double prod;
      double sum;
      double tmp1[2];
      double tmp2[2];


      /* Test the input parameters. */
      if (n < 0)
	BLAS_error(routine_name, -2, n, NULL);
      else if (incx == 0)
	BLAS_error(routine_name, -5, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -8, incy, NULL);

      /* Immediate return. */
      if (((beta_i[0] == 1.0 && beta_i[1] == 0.0))
	  && (n == 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0)))
	return;



      r_v[0] = r_i[0];
      r_v[1] = r_i[0 + 1];
      sum = 0.0;


      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;

      for (i = 0; i < n; ++i) {
	x_ii = x_i[ix];
	y_ii = y_i[iy];

	prod = (double) x_ii *y_ii;	/* prod = x[i]*y[i] */
	sum = sum + prod;	/* sum = sum+prod */
	ix += incx;
	iy += incy;
      }				/* endfor */


      {
	tmp1[0] = alpha_i[0] * sum;
	tmp1[1] = alpha_i[1] * sum;
      }				/* tmp1 = sum*alpha */
      {
	tmp2[0] = (double) r_v[0] * beta_i[0] - (double) r_v[1] * beta_i[1];
	tmp2[1] = (double) r_v[0] * beta_i[1] + (double) r_v[1] * beta_i[0];
      }				/* tmp2 = r*beta */
      tmp1[0] = tmp1[0] + tmp2[0];
      tmp1[1] = tmp1[1] + tmp2[1];	/* tmp1 = tmp1+tmp2 */
      ((float *) r)[0] = tmp1[0];
      ((float *) r)[1] = tmp1[1];	/* r = tmp1 */


    }
    break;
  case blas_prec_extra:
    {
      int i, ix = 0, iy = 0;
      float *r_i = (float *) r;
      const float *x_i = x;
      const float *y_i = y;
      float *alpha_i = (float *) alpha;
      float *beta_i = (float *) beta;
      float x_ii;
      float y_ii;
      float r_v[2];
      double head_prod, tail_prod;
      double head_sum, tail_sum;
      double head_tmp1[2], tail_tmp1[2];
      double head_tmp2[2], tail_tmp2[2];
      FPU_FIX_DECL;

      /* Test the input parameters. */
      if (n < 0)
	BLAS_error(routine_name, -2, n, NULL);
      else if (incx == 0)
	BLAS_error(routine_name, -5, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -8, incy, NULL);

      /* Immediate return. */
      if (((beta_i[0] == 1.0 && beta_i[1] == 0.0))
	  && (n == 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0)))
	return;

      FPU_FIX_START;

      r_v[0] = r_i[0];
      r_v[1] = r_i[0 + 1];
      head_sum = tail_sum = 0.0;


      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;

      for (i = 0; i < n; ++i) {
	x_ii = x_i[ix];
	y_ii = y_i[iy];

	head_prod = (double) x_ii *y_ii;
	tail_prod = 0.0;	/* prod = x[i]*y[i] */
	{
	  /* Compute double-double = double-double + double-double. */
	  double bv;
	  double s1, s2, t1, t2;

	  /* Add two hi words. */
	  s1 = head_sum + head_prod;
	  bv = s1 - head_sum;
	  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

	  /* Add two lo words. */
	  t1 = tail_sum + tail_prod;
	  bv = t1 - tail_sum;
	  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

	  s2 += t1;

	  /* Renormalize (s1, s2)  to  (t1, s2) */
	  t1 = s1 + s2;
	  s2 = s2 - (t1 - s1);

	  t2 += s2;

	  /* Renormalize (t1, t2)  */
	  head_sum = t1 + t2;
	  tail_sum = t2 - (head_sum - t1);
	}			/* sum = sum+prod */
	ix += incx;
	iy += incy;
      }				/* endfor */


      {
	double head_e1, tail_e1;
	double dt;
	dt = (double) alpha_i[0];
	{
	  /* Compute double-double = double-double * double. */
	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	  con = head_sum * split;
	  a11 = con - head_sum;
	  a11 = con - a11;
	  a21 = head_sum - a11;
	  con = dt * split;
	  b1 = con - dt;
	  b1 = con - b1;
	  b2 = dt - b1;

	  c11 = head_sum * dt;
	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	  c2 = tail_sum * dt;
	  t1 = c11 + c2;
	  t2 = (c2 - (t1 - c11)) + c21;

	  head_e1 = t1 + t2;
	  tail_e1 = t2 - (head_e1 - t1);
	}
	head_tmp1[0] = head_e1;
	tail_tmp1[0] = tail_e1;
	dt = (double) alpha_i[1];
	{
	  /* Compute double-double = double-double * double. */
	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	  con = head_sum * split;
	  a11 = con - head_sum;
	  a11 = con - a11;
	  a21 = head_sum - a11;
	  con = dt * split;
	  b1 = con - dt;
	  b1 = con - b1;
	  b2 = dt - b1;

	  c11 = head_sum * dt;
	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	  c2 = tail_sum * dt;
	  t1 = c11 + c2;
	  t2 = (c2 - (t1 - c11)) + c21;

	  head_e1 = t1 + t2;
	  tail_e1 = t2 - (head_e1 - t1);
	}
	head_tmp1[1] = head_e1;
	tail_tmp1[1] = tail_e1;
      }				/* tmp1 = sum*alpha */
      {
	double head_e1, tail_e1;
	double d1;
	double d2;
	/* Real part */
	d1 = (double) r_v[0] * beta_i[0];
	d2 = (double) -r_v[1] * beta_i[1];
	{
	  /* Compute double-double = double + double. */
	  double e, t1, t2;

	  /* Knuth trick. */
	  t1 = d1 + d2;
	  e = t1 - d1;
	  t2 = ((d2 - e) + (d1 - (t1 - e)));

	  /* The result is t1 + t2, after normalization. */
	  head_e1 = t1 + t2;
	  tail_e1 = t2 - (head_e1 - t1);
	}
	head_tmp2[0] = head_e1;
	tail_tmp2[0] = tail_e1;
	/* imaginary part */
	d1 = (double) r_v[0] * beta_i[1];
	d2 = (double) r_v[1] * beta_i[0];
	{
	  /* Compute double-double = double + double. */
	  double e, t1, t2;

	  /* Knuth trick. */
	  t1 = d1 + d2;
	  e = t1 - d1;
	  t2 = ((d2 - e) + (d1 - (t1 - e)));

	  /* The result is t1 + t2, after normalization. */
	  head_e1 = t1 + t2;
	  tail_e1 = t2 - (head_e1 - t1);
	}
	head_tmp2[1] = head_e1;
	tail_tmp2[1] = tail_e1;
      }				/* tmp2 = r*beta */
      {
	double head_t, tail_t;
	double head_a, tail_a;
	double head_b, tail_b;
	/* Real part */
	head_a = head_tmp1[0];
	tail_a = tail_tmp1[0];
	head_b = head_tmp2[0];
	tail_b = tail_tmp2[0];
	{
	  /* Compute double-double = double-double + double-double. */
	  double bv;
	  double s1, s2, t1, t2;

	  /* Add two hi words. */
	  s1 = head_a + head_b;
	  bv = s1 - head_a;
	  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	  /* Add two lo words. */
	  t1 = tail_a + tail_b;
	  bv = t1 - tail_a;
	  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	  s2 += t1;

	  /* Renormalize (s1, s2)  to  (t1, s2) */
	  t1 = s1 + s2;
	  s2 = s2 - (t1 - s1);

	  t2 += s2;

	  /* Renormalize (t1, t2)  */
	  head_t = t1 + t2;
	  tail_t = t2 - (head_t - t1);
	}
	head_tmp1[0] = head_t;
	tail_tmp1[0] = tail_t;
	/* Imaginary part */
	head_a = head_tmp1[1];
	tail_a = tail_tmp1[1];
	head_b = head_tmp2[1];
	tail_b = tail_tmp2[1];
	{
	  /* Compute double-double = double-double + double-double. */
	  double bv;
	  double s1, s2, t1, t2;

	  /* Add two hi words. */
	  s1 = head_a + head_b;
	  bv = s1 - head_a;
	  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	  /* Add two lo words. */
	  t1 = tail_a + tail_b;
	  bv = t1 - tail_a;
	  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	  s2 += t1;

	  /* Renormalize (s1, s2)  to  (t1, s2) */
	  t1 = s1 + s2;
	  s2 = s2 - (t1 - s1);

	  t2 += s2;

	  /* Renormalize (t1, t2)  */
	  head_t = t1 + t2;
	  tail_t = t2 - (head_t - t1);
	}
	head_tmp1[1] = head_t;
	tail_tmp1[1] = tail_t;
      }				/* tmp1 = tmp1+tmp2 */
      ((float *) r)[0] = head_tmp1[0];
      ((float *) r)[1] = head_tmp1[1];	/* r = tmp1 */

      FPU_FIX_STOP;
    }
    break;
  }
}
Exemplo n.º 6
0
void BLAS_cge_sum_mv_s_c(enum blas_order_type order, int m, int n,
			 const void *alpha, const float *a, int lda,
			 const void *x, int incx,
			 const void *beta, const float *b, int ldb,
			 void *y, int incy)

/*
 * Purpose
 * =======
 *
 * Computes y = alpha * A * x + beta * B * y, 
 *     where A, B are general matricies.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of A; row or column major
 *
 * m            (input) int
 *              Row Dimension of A, B, length of output vector y
 *
 * n            (input) int
 *              Column Dimension of A, B and the length of vector x
 *
 * alpha        (input) const void*
 *              
 * A            (input) const float*
 *
 * lda          (input) int 
 *              Leading dimension of A
 *
 * x            (input) const void*
 * 
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) const void*
 *
 * b            (input) const float*
 *
 * ldb          (input) int 
 *              Leading dimension of B
 *
 * y            (input/output) void*
 *
 * incy         (input) int
 *              The stride for vector y.
 * 
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_cge_sum_mv_s_c";
  int i, j;
  int xi, yi;
  int x_starti, y_starti, incxi, incyi;
  int lda_min;
  int ai;
  int incai;
  int aij;
  int incaij;
  int bi;
  int incbi;
  int bij;
  int incbij;

  const float *a_i = a;
  const float *b_i = b;
  const float *x_i = (float *) x;
  float *y_i = (float *) y;
  float *alpha_i = (float *) alpha;
  float *beta_i = (float *) beta;
  float a_elem;
  float b_elem;
  float x_elem[2];
  float prod[2];
  float sumA[2];
  float sumB[2];
  float tmp1[2];
  float tmp2[2];



  /* m is number of rows */
  /* n is number of columns */

  if (m == 0 || n == 0)
    return;


  /* all error calls */
  if (order == blas_rowmajor) {
    lda_min = n;
    incai = lda;		/* row stride */
    incbi = ldb;
    incbij = incaij = 1;	/* column stride */
  } else if (order == blas_colmajor) {
    lda_min = m;
    incai = incbi = 1;		/*row stride */
    incaij = lda;		/* column stride */
    incbij = ldb;
  } else {
    /* error, order not blas_colmajor not blas_rowmajor */
    BLAS_error(routine_name, -1, order, 0);
    return;
  }

  if (m < 0)
    BLAS_error(routine_name, -2, m, 0);
  else if (n < 0)
    BLAS_error(routine_name, -3, n, 0);
  if (lda < lda_min)
    BLAS_error(routine_name, -6, lda, 0);
  else if (ldb < lda_min)
    BLAS_error(routine_name, -11, ldb, 0);
  else if (incx == 0)
    BLAS_error(routine_name, -8, incx, 0);
  else if (incy == 0)
    BLAS_error(routine_name, -13, incy, 0);

  incxi = incx;
  incyi = incy;
  incxi *= 2;
  incyi *= 2;





  if (incxi > 0)
    x_starti = 0;
  else
    x_starti = (1 - n) * incxi;

  if (incyi > 0)
    y_starti = 0;
  else
    y_starti = (1 - m) * incyi;



  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
    if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
      /* alpha, beta are 0.0 */
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	y_i[yi] = 0.0;
	y_i[yi + 1] = 0.0;
      }
    } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
      /* alpha is 0.0, beta is 1.0 */


      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {

	sumB[0] = sumB[1] = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];

	  b_elem = b_i[bij];
	  {
	    prod[0] = x_elem[0] * b_elem;
	    prod[1] = x_elem[1] * b_elem;
	  }
	  sumB[0] = sumB[0] + prod[0];
	  sumB[1] = sumB[1] + prod[1];
	  bij += incbij;
	}
	/* now put the result into y_i */
	y_i[yi] = sumB[0];
	y_i[yi + 1] = sumB[1];

	bi += incbi;
      }
    } else {
      /* alpha is 0.0, beta not 1.0 nor 0.0 */


      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {

	sumB[0] = sumB[1] = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];

	  b_elem = b_i[bij];
	  {
	    prod[0] = x_elem[0] * b_elem;
	    prod[1] = x_elem[1] * b_elem;
	  }
	  sumB[0] = sumB[0] + prod[0];
	  sumB[1] = sumB[1] + prod[1];
	  bij += incbij;
	}
	/* now put the result into y_i */
	{
	  tmp1[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1];
	  tmp1[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0];
	}

	y_i[yi] = tmp1[0];
	y_i[yi + 1] = tmp1[1];

	bi += incbi;
      }
    }
  } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
    if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
      /* alpha is 1.0, beta is 0.0 */

      ai = 0;

      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA[0] = sumA[1] = 0.0;
	aij = ai;

	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  a_elem = a_i[aij];
	  {
	    prod[0] = x_elem[0] * a_elem;
	    prod[1] = x_elem[1] * a_elem;
	  }
	  sumA[0] = sumA[0] + prod[0];
	  sumA[1] = sumA[1] + prod[1];
	  aij += incaij;

	}
	/* now put the result into y_i */
	y_i[yi] = sumA[0];
	y_i[yi + 1] = sumA[1];
	ai += incai;

      }
    } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
      /* alpha is 1.0, beta is 1.0 */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA[0] = sumA[1] = 0.0;
	aij = ai;
	sumB[0] = sumB[1] = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  a_elem = a_i[aij];
	  {
	    prod[0] = x_elem[0] * a_elem;
	    prod[1] = x_elem[1] * a_elem;
	  }
	  sumA[0] = sumA[0] + prod[0];
	  sumA[1] = sumA[1] + prod[1];
	  aij += incaij;
	  b_elem = b_i[bij];
	  {
	    prod[0] = x_elem[0] * b_elem;
	    prod[1] = x_elem[1] * b_elem;
	  }
	  sumB[0] = sumB[0] + prod[0];
	  sumB[1] = sumB[1] + prod[1];
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1[0] = sumA[0];
	tmp1[1] = sumA[1];
	tmp2[0] = sumB[0];
	tmp2[1] = sumB[1];
	tmp1[0] = tmp1[0] + tmp2[0];
	tmp1[1] = tmp1[1] + tmp2[1];
	y_i[yi] = tmp1[0];
	y_i[yi + 1] = tmp1[1];
	ai += incai;
	bi += incbi;
      }
    } else {
      /* alpha is 1.0, beta is other */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA[0] = sumA[1] = 0.0;
	aij = ai;
	sumB[0] = sumB[1] = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  a_elem = a_i[aij];
	  {
	    prod[0] = x_elem[0] * a_elem;
	    prod[1] = x_elem[1] * a_elem;
	  }
	  sumA[0] = sumA[0] + prod[0];
	  sumA[1] = sumA[1] + prod[1];
	  aij += incaij;
	  b_elem = b_i[bij];
	  {
	    prod[0] = x_elem[0] * b_elem;
	    prod[1] = x_elem[1] * b_elem;
	  }
	  sumB[0] = sumB[0] + prod[0];
	  sumB[1] = sumB[1] + prod[1];
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1[0] = sumA[0];
	tmp1[1] = sumA[1];
	{
	  tmp2[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1];
	  tmp2[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0];
	}

	tmp1[0] = tmp1[0] + tmp2[0];
	tmp1[1] = tmp1[1] + tmp2[1];
	y_i[yi] = tmp1[0];
	y_i[yi + 1] = tmp1[1];
	ai += incai;
	bi += incbi;
      }
    }
  } else {
    if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
      /* alpha is other, beta is 0.0 */

      ai = 0;

      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA[0] = sumA[1] = 0.0;
	aij = ai;

	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  a_elem = a_i[aij];
	  {
	    prod[0] = x_elem[0] * a_elem;
	    prod[1] = x_elem[1] * a_elem;
	  }
	  sumA[0] = sumA[0] + prod[0];
	  sumA[1] = sumA[1] + prod[1];
	  aij += incaij;

	}
	/* now put the result into y_i */
	{
	  tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1];
	  tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0];
	}

	y_i[yi] = tmp1[0];
	y_i[yi + 1] = tmp1[1];
	ai += incai;

      }
    } else if ((beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
      /* alpha is other, beta is 1.0 */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA[0] = sumA[1] = 0.0;
	aij = ai;
	sumB[0] = sumB[1] = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  a_elem = a_i[aij];
	  {
	    prod[0] = x_elem[0] * a_elem;
	    prod[1] = x_elem[1] * a_elem;
	  }
	  sumA[0] = sumA[0] + prod[0];
	  sumA[1] = sumA[1] + prod[1];
	  aij += incaij;
	  b_elem = b_i[bij];
	  {
	    prod[0] = x_elem[0] * b_elem;
	    prod[1] = x_elem[1] * b_elem;
	  }
	  sumB[0] = sumB[0] + prod[0];
	  sumB[1] = sumB[1] + prod[1];
	  bij += incbij;
	}
	/* now put the result into y_i */
	{
	  tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1];
	  tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0];
	}

	tmp2[0] = sumB[0];
	tmp2[1] = sumB[1];
	tmp1[0] = tmp1[0] + tmp2[0];
	tmp1[1] = tmp1[1] + tmp2[1];
	y_i[yi] = tmp1[0];
	y_i[yi + 1] = tmp1[1];
	ai += incai;
	bi += incbi;
      }
    } else {
      /* most general form, alpha, beta are other */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA[0] = sumA[1] = 0.0;
	aij = ai;
	sumB[0] = sumB[1] = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  a_elem = a_i[aij];
	  {
	    prod[0] = x_elem[0] * a_elem;
	    prod[1] = x_elem[1] * a_elem;
	  }
	  sumA[0] = sumA[0] + prod[0];
	  sumA[1] = sumA[1] + prod[1];
	  aij += incaij;
	  b_elem = b_i[bij];
	  {
	    prod[0] = x_elem[0] * b_elem;
	    prod[1] = x_elem[1] * b_elem;
	  }
	  sumB[0] = sumB[0] + prod[0];
	  sumB[1] = sumB[1] + prod[1];
	  bij += incbij;
	}
	/* now put the result into y_i */
	{
	  tmp1[0] = sumA[0] * alpha_i[0] - sumA[1] * alpha_i[1];
	  tmp1[1] = sumA[0] * alpha_i[1] + sumA[1] * alpha_i[0];
	}

	{
	  tmp2[0] = sumB[0] * beta_i[0] - sumB[1] * beta_i[1];
	  tmp2[1] = sumB[0] * beta_i[1] + sumB[1] * beta_i[0];
	}

	tmp1[0] = tmp1[0] + tmp2[0];
	tmp1[1] = tmp1[1] + tmp2[1];
	y_i[yi] = tmp1[0];
	y_i[yi + 1] = tmp1[1];
	ai += incai;
	bi += incbi;
      }
    }
  }


}				/* end BLAS_cge_sum_mv_s_c */
Exemplo n.º 7
0
void BLAS_cgemv2_s_c(enum blas_order_type order, enum blas_trans_type trans,
		     int m, int n, const void *alpha, const float *a, int lda,
		     const void *head_x, const void *tail_x, int incx,
		     const void *beta, void *y, int incy)

/*
 * Purpose
 * =======
 *
 * Computes y = alpha * op(A) * head_x + alpha * op(A) * tail_x + beta * y,
 * where A is a general matrix.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of A; row or column major
 *
 * trans        (input) blas_trans_type
 *              Transpose of A: no trans, trans, or conjugate trans
 *
 * m            (input) int
 *              Dimension of A
 *
 * n            (input) int
 *              Dimension of A and the length of vector x and z
 *
 * alpha        (input) const void*
 *              
 * A            (input) const float*
 *
 * lda          (input) int 
 *              Leading dimension of A
 *
 * head_x
 * tail_x       (input) const void*
 * 
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) const void*
 *
 * y            (input) const void*
 * 
 * incy         (input) int
 *              The stride for vector y.
 * 
 */
{
  static const char routine_name[] = "BLAS_cgemv2_s_c";

  int i, j;
  int iy, jx, kx, ky;
  int lenx, leny;
  int ai, aij;
  int incai, incaij;

  const float *a_i = a;
  const float *head_x_i = (float *) head_x;
  const float *tail_x_i = (float *) tail_x;
  float *y_i = (float *) y;
  float *alpha_i = (float *) alpha;
  float *beta_i = (float *) beta;
  float a_elem;
  float x_elem[2];
  float y_elem[2];
  float prod[2];
  float sum[2];
  float sum2[2];
  float tmp1[2];
  float tmp2[2];


  /* all error calls */
  if (m < 0)
    BLAS_error(routine_name, -3, m, 0);
  else if (n <= 0)
    BLAS_error(routine_name, -4, n, 0);
  else if (incx == 0)
    BLAS_error(routine_name, -10, incx, 0);
  else if (incy == 0)
    BLAS_error(routine_name, -13, incy, 0);

  if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
    lenx = n;
    leny = m;
    incai = lda;
    incaij = 1;
  } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) {
    lenx = m;
    leny = n;
    incai = 1;
    incaij = lda;
  } else if ((order == blas_colmajor) && (trans == blas_no_trans)) {
    lenx = n;
    leny = m;
    incai = 1;
    incaij = lda;
  } else {			/* colmajor and blas_trans */
    lenx = m;
    leny = n;
    incai = lda;
    incaij = 1;
  }

  if (lda < leny)
    BLAS_error(routine_name, -7, lda, NULL);



  incx *= 2;
  incy *= 2;



  if (incx > 0)
    kx = 0;
  else
    kx = (1 - lenx) * incx;
  if (incy > 0)
    ky = 0;
  else
    ky = (1 - leny) * incy;

  /* No extra-precision needed for alpha = 0 */
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
    if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
      iy = ky;
      for (i = 0; i < leny; i++) {
	y_i[iy] = 0.0;
	y_i[iy + 1] = 0.0;
	iy += incy;
      }
    } else if (!(beta_i[0] == 0.0 && beta_i[1] == 0.0)) {
      iy = ky;
      for (i = 0; i < leny; i++) {
	y_elem[0] = y_i[iy];
	y_elem[1] = y_i[iy + 1];
	{
	  tmp1[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
	  tmp1[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
	}

	y_i[iy] = tmp1[0];
	y_i[iy + 1] = tmp1[1];
	iy += incy;
      }
    }
  } else {			/* alpha != 0 */

    /* if beta = 0, we can save m multiplies:
       y = alpha*A*head_x + alpha*A*tail_x  */
    if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
      if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	/* save m more multiplies if alpha = 1 */
	ai = 0;
	iy = ky;
	for (i = 0; i < leny; i++) {
	  sum[0] = sum[1] = 0.0;
	  sum2[0] = sum2[1] = 0.0;
	  aij = ai;
	  jx = kx;
	  for (j = 0; j < lenx; j++) {
	    a_elem = a_i[aij];

	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	  sum[0] = sum[0] + sum2[0];
	  sum[1] = sum[1] + sum2[1];
	  y_i[iy] = sum[0];
	  y_i[iy + 1] = sum[1];
	  ai += incai;
	  iy += incy;
	}			/* end for */
      } else {			/* alpha != 1 */
	ai = 0;
	iy = ky;
	for (i = 0; i < leny; i++) {
	  sum[0] = sum[1] = 0.0;
	  sum2[0] = sum2[1] = 0.0;
	  aij = ai;
	  jx = kx;
	  for (j = 0; j < lenx; j++) {
	    a_elem = a_i[aij];

	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	  {
	    tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
	    tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
	  }

	  {
	    tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
	    tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
	  }

	  tmp1[0] = tmp1[0] + tmp2[0];
	  tmp1[1] = tmp1[1] + tmp2[1];
	  y_i[iy] = tmp1[0];
	  y_i[iy + 1] = tmp1[1];
	  ai += incai;
	  iy += incy;
	}
      }
    } else {			/* beta != 0 */
      if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	/* save m multiplies if alpha = 1 */
	ai = 0;
	iy = ky;
	for (i = 0; i < leny; i++) {
	  sum[0] = sum[1] = 0.0;;
	  sum2[0] = sum2[1] = 0.0;;
	  aij = ai;
	  jx = kx;
	  for (j = 0; j < lenx; j++) {
	    a_elem = a_i[aij];

	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	  sum[0] = sum[0] + sum2[0];
	  sum[1] = sum[1] + sum2[1];
	  y_elem[0] = y_i[iy];
	  y_elem[1] = y_i[iy + 1];
	  {
	    tmp1[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
	    tmp1[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
	  }

	  tmp2[0] = sum[0] + tmp1[0];
	  tmp2[1] = sum[1] + tmp1[1];
	  y_i[iy] = tmp2[0];
	  y_i[iy + 1] = tmp2[1];
	  ai += incai;
	  iy += incy;
	}
      } else {			/* alpha != 1, the most general form:
				   y = alpha*A*head_x + alpha*A*tail_x + beta*y */
	ai = 0;
	iy = ky;
	for (i = 0; i < leny; i++) {
	  sum[0] = sum[1] = 0.0;;
	  sum2[0] = sum2[1] = 0.0;;
	  aij = ai;
	  jx = kx;
	  for (j = 0; j < lenx; j++) {
	    a_elem = a_i[aij];

	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	  {
	    tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
	    tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
	  }

	  {
	    tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
	    tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
	  }

	  tmp1[0] = tmp1[0] + tmp2[0];
	  tmp1[1] = tmp1[1] + tmp2[1];
	  y_elem[0] = y_i[iy];
	  y_elem[1] = y_i[iy + 1];
	  {
	    tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
	    tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
	  }

	  tmp1[0] = tmp1[0] + tmp2[0];
	  tmp1[1] = tmp1[1] + tmp2[1];
	  y_i[iy] = tmp1[0];
	  y_i[iy + 1] = tmp1[1];
	  ai += incai;
	  iy += incy;
	}
      }
    }

  }



}
Exemplo n.º 8
0
void BLAS_zhbmv_c_z(enum blas_order_type order,
		    enum blas_uplo_type uplo, int n, int k,
		    const void *alpha, const void *a, int lda,
		    const void *x, int incx, const void *beta,
		    void *y, int incy)

/* 
 * Purpose
 * =======
 *
 * This routines computes the matrix product:
 *
 *     y  <-  alpha * A * x  +  beta * y
 * 
 * where A is a hermitian band matrix.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input hermitian matrix A.
 * 
 * uplo    (input) enum blas_uplo_type
 *         Determines which half of matrix A (upper or lower triangle)
 *         is accessed.
 *
 * n       (input) int
 *         Dimension of A and size of vectors x, y.
 *
 * k       (input) int
 *         Number of subdiagonals ( = number of superdiagonals)
 *      
 * alpha   (input) const void*
 * 
 * a       (input) void*
 *         Matrix A.
 *
 * lda     (input) int
 *         Leading dimension of matrix A.
 *
 * x       (input) void*
 *         Vector x.
 *   
 * incx    (input) int
 *         Stride for vector x.
 *
 * beta    (input) const void*
 * 
 * y       (input/output) void*
 *         Vector y.
 *
 * incy    (input) int
 *         Stride for vector y. 
 *
 *
 *  Notes on storing a hermitian band matrix:
 * 
 *      Integers in the below arrays represent values of
 *              type complex float.
 *
 *    if we have a hermitian matrix:
 *
 *      1d  2   3   0   0
 *      2#  4d  5   6   0
 *      3#  5#  7d  8   9
 *      0   6#  8#  10d 11
 *      0   0   9#  11# 12d
 *
 *     This matrix has n == 5, and k == 2. It can be stored in the
 *      following ways:
 *
 *      Notes for the examples:
 *      Each column below represents a contigous vector.
 *      Columns are strided by lda.
 *      An asterisk (*) represents a position in the 
 *       matrix that is not used.
 *      A pound sign (#) represents the conjugated form is stored
 *      A d following an integer indicates that the imaginary
 *       part of the number is assumed to be zero.
 *      Note that the minimum lda (size of column) is 3 (k+1).
 *       lda may be arbitrarily large; an lda > 3 would mean
 *       there would be unused data at the bottom of the below
 *       columns.        
 *
 *    blas_colmajor and blas_upper:
 *      *   *   3   6   9
 *      *   2   5   8   11 
 *      1d  4d  7d  10d 12d
 *
 *
 *    blas_colmajor and blas_lower
 *      1d   4d   7d   10d  12d
 *      2#   5#   8#   11#  *
 *      3#   6#   9#   *    *
 *
 *
 *    blas_rowmajor and blas_upper 
 *      Columns here also represent contigous arrays.
 *      1d  4d  7d  10d  12d
 *      2   5   8   11   *
 *      3   6   9   *    *
 *
 *
 *    blas_rowmajor and blas_lower
 *      Columns here also represent contigous arrays.
 *      *   *   3#  6#   9#
 *      *   2#  5#  8#   11#
 *      1d  4d  7d  10d  12d
 *
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_zhbmv_c_z";

  /* Integer Index Variables */
  int i, j;
  int xi, yi;
  int aij, astarti, x_starti, y_starti;
  int incaij, incaij2;
  int n_i;
  int maxj_first, maxj_second;

  /* Input Matrices */
  const float *a_i = (float *) a;
  const double *x_i = (double *) x;

  /* Output Vector */
  double *y_i = (double *) y;

  /* Input Scalars */
  double *alpha_i = (double *) alpha;
  double *beta_i = (double *) beta;

  /* Temporary Floating-Point Variables */
  float a_elem[2];
  double x_elem[2];
  double y_elem[2];
  double prod[2];
  double sum[2];
  double tmp1[2];
  double tmp2[2];



  /* Test for no-op */
  if (n <= 0) {
    return;
  }
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
      && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
    return;
  }

  /* Check for error conditions. */
  if (order != blas_colmajor && order != blas_rowmajor) {
    BLAS_error(routine_name, -1, order, 0);
  }
  if (uplo != blas_upper && uplo != blas_lower) {
    BLAS_error(routine_name, -2, uplo, 0);
  }
  if (n < 0) {
    BLAS_error(routine_name, -3, n, 0);
  }
  if (k < 0 || k > n) {
    BLAS_error(routine_name, -4, k, 0);
  }
  if ((lda < k + 1) || (lda < 1)) {
    BLAS_error(routine_name, -7, lda, 0);
  }
  if (incx == 0) {
    BLAS_error(routine_name, -9, incx, 0);
  }
  if (incy == 0) {
    BLAS_error(routine_name, -12, incy, 0);
  }

  /* Set Index Parameters */
  n_i = n;

  if (((uplo == blas_upper) && (order == blas_colmajor)) ||
      ((uplo == blas_lower) && (order == blas_rowmajor))) {
    incaij = 1;			/* increment in first loop */
    incaij2 = lda - 1;		/* increment in second loop */
    astarti = k;		/* does not start on zero element */
  } else {
    incaij = lda - 1;
    incaij2 = 1;
    astarti = 0;		/* start on first element of array */
  }
  /* Adjustment to increments (if any) */
  incx *= 2;
  incy *= 2;
  astarti *= 2;
  incaij *= 2;
  incaij2 *= 2;
  if (incx < 0) {
    x_starti = (-n_i + 1) * incx;
  } else {
    x_starti = 0;
  }
  if (incy < 0) {
    y_starti = (-n_i + 1) * incy;
  } else {
    y_starti = 0;
  }



  /* alpha = 0.  In this case, just return beta * y */
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
    for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
      y_elem[0] = y_i[yi];
      y_elem[1] = y_i[yi + 1];
      {
	tmp1[0] =
	  (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
	tmp1[1] =
	  (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
      }
      y_i[yi] = tmp1[0];
      y_i[yi + 1] = tmp1[1];
    }
  } else {
    /*  determine the loop interation counts */
    /* maj_first is number of elements done in first loop 
       (this will increase by one over each column up to a limit) */
    maxj_first = 0;

    /* maxj_second is number of elements done in 
       second loop the first time */
    maxj_second = MIN(k + 1, n_i);

    /*  determine whether we conjugate in first loop or second loop */
    if (uplo == blas_lower) {
      /*  conjugate second loop */

      /* Case alpha == 1. */
      if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {

	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	    sum[0] = sum[1] = 0.0;
	    for (j = 0, aij = astarti, xi = x_starti;
		 j < maxj_first; j++, aij += incaij, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aij];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    aij += incaij2;
	    xi += incx;
	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_i[yi] = sum[0];
	    y_i[yi + 1] = sum[1];
	    if (i + 1 >= (n_i - k)) {
	      maxj_second--;
	    }
	    if (i >= k) {
	      astarti += (incaij + incaij2);
	      x_starti += incx;
	    } else {
	      maxj_first++;
	      astarti += incaij2;
	    }
	  }
	} else {
	  /* Case alpha = 1, but beta != 0. 
	     We compute  y  <--- A * x + beta * y */
	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	    sum[0] = sum[1] = 0.0;

	    for (j = 0, aij = astarti, xi = x_starti;
		 j < maxj_first; j++, aij += incaij, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aij];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    aij += incaij2;
	    xi += incx;
	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_elem[0] = y_i[yi];
	    y_elem[1] = y_i[yi + 1];
	    {
	      tmp2[0] =
		(double) y_elem[0] * beta_i[0] -
		(double) y_elem[1] * beta_i[1];
	      tmp2[1] =
		(double) y_elem[0] * beta_i[1] +
		(double) y_elem[1] * beta_i[0];
	    }
	    tmp1[0] = sum[0];
	    tmp1[1] = sum[1];
	    tmp1[0] = tmp2[0] + tmp1[0];
	    tmp1[1] = tmp2[1] + tmp1[1];
	    y_i[yi] = tmp1[0];
	    y_i[yi + 1] = tmp1[1];
	    if (i + 1 >= (n_i - k)) {
	      maxj_second--;
	    }
	    if (i >= k) {
	      astarti += (incaij + incaij2);
	      x_starti += incx;
	    } else {
	      maxj_first++;
	      astarti += incaij2;
	    }
	  }
	}
      } else {
	/* The most general form,   y <--- alpha * A * x + beta * y */
	for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	  sum[0] = sum[1] = 0.0;

	  for (j = 0, aij = astarti, xi = x_starti;
	       j < maxj_first; j++, aij += incaij, xi += incx) {
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];

	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] =
		(double) a_elem[0] * x_elem[0] -
		(double) a_elem[1] * x_elem[1];
	      prod[1] =
		(double) a_elem[0] * x_elem[1] +
		(double) a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  a_elem[0] = a_i[aij];
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  {
	    prod[0] = x_elem[0] * a_elem[0];
	    prod[1] = x_elem[1] * a_elem[0];
	  }
	  sum[0] = sum[0] + prod[0];
	  sum[1] = sum[1] + prod[1];
	  aij += incaij2;
	  xi += incx;
	  for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    a_elem[1] = -a_elem[1];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] =
		(double) a_elem[0] * x_elem[0] -
		(double) a_elem[1] * x_elem[1];
	      prod[1] =
		(double) a_elem[0] * x_elem[1] +
		(double) a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    tmp2[0] =
	      (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
	    tmp2[1] =
	      (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
	  }
	  {
	    tmp1[0] =
	      (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
	    tmp1[1] =
	      (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
	  }
	  tmp1[0] = tmp2[0] + tmp1[0];
	  tmp1[1] = tmp2[1] + tmp1[1];
	  y_i[yi] = tmp1[0];
	  y_i[yi + 1] = tmp1[1];
	  if (i + 1 >= (n_i - k)) {
	    maxj_second--;
	  }
	  if (i >= k) {
	    astarti += (incaij + incaij2);
	    x_starti += incx;
	  } else {
	    maxj_first++;
	    astarti += incaij2;
	  }
	}
      }
    } else {
      /*  conjugate first loop */

      /* Case alpha == 1. */
      if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {

	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	    sum[0] = sum[1] = 0.0;
	    for (j = 0, aij = astarti, xi = x_starti;
		 j < maxj_first; j++, aij += incaij, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aij];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    aij += incaij2;
	    xi += incx;
	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_i[yi] = sum[0];
	    y_i[yi + 1] = sum[1];
	    if (i + 1 >= (n_i - k)) {
	      maxj_second--;
	    }
	    if (i >= k) {
	      astarti += (incaij + incaij2);
	      x_starti += incx;
	    } else {
	      maxj_first++;
	      astarti += incaij2;
	    }
	  }
	} else {
	  /* Case alpha = 1, but beta != 0. 
	     We compute  y  <--- A * x + beta * y */
	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	    sum[0] = sum[1] = 0.0;

	    for (j = 0, aij = astarti, xi = x_starti;
		 j < maxj_first; j++, aij += incaij, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aij];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    aij += incaij2;
	    xi += incx;
	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] =
		  (double) a_elem[0] * x_elem[0] -
		  (double) a_elem[1] * x_elem[1];
		prod[1] =
		  (double) a_elem[0] * x_elem[1] +
		  (double) a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_elem[0] = y_i[yi];
	    y_elem[1] = y_i[yi + 1];
	    {
	      tmp2[0] =
		(double) y_elem[0] * beta_i[0] -
		(double) y_elem[1] * beta_i[1];
	      tmp2[1] =
		(double) y_elem[0] * beta_i[1] +
		(double) y_elem[1] * beta_i[0];
	    }
	    tmp1[0] = sum[0];
	    tmp1[1] = sum[1];
	    tmp1[0] = tmp2[0] + tmp1[0];
	    tmp1[1] = tmp2[1] + tmp1[1];
	    y_i[yi] = tmp1[0];
	    y_i[yi + 1] = tmp1[1];
	    if (i + 1 >= (n_i - k)) {
	      maxj_second--;
	    }
	    if (i >= k) {
	      astarti += (incaij + incaij2);
	      x_starti += incx;
	    } else {
	      maxj_first++;
	      astarti += incaij2;
	    }
	  }
	}
      } else {
	/* The most general form,   y <--- alpha * A * x + beta * y */
	for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	  sum[0] = sum[1] = 0.0;

	  for (j = 0, aij = astarti, xi = x_starti;
	       j < maxj_first; j++, aij += incaij, xi += incx) {
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    a_elem[1] = -a_elem[1];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] =
		(double) a_elem[0] * x_elem[0] -
		(double) a_elem[1] * x_elem[1];
	      prod[1] =
		(double) a_elem[0] * x_elem[1] +
		(double) a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  a_elem[0] = a_i[aij];
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  {
	    prod[0] = x_elem[0] * a_elem[0];
	    prod[1] = x_elem[1] * a_elem[0];
	  }
	  sum[0] = sum[0] + prod[0];
	  sum[1] = sum[1] + prod[1];
	  aij += incaij2;
	  xi += incx;
	  for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];

	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] =
		(double) a_elem[0] * x_elem[0] -
		(double) a_elem[1] * x_elem[1];
	      prod[1] =
		(double) a_elem[0] * x_elem[1] +
		(double) a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    tmp2[0] =
	      (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
	    tmp2[1] =
	      (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
	  }
	  {
	    tmp1[0] =
	      (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
	    tmp1[1] =
	      (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
	  }
	  tmp1[0] = tmp2[0] + tmp1[0];
	  tmp1[1] = tmp2[1] + tmp1[1];
	  y_i[yi] = tmp1[0];
	  y_i[yi + 1] = tmp1[1];
	  if (i + 1 >= (n_i - k)) {
	    maxj_second--;
	  }
	  if (i >= k) {
	    astarti += (incaij + incaij2);
	    x_starti += incx;
	  } else {
	    maxj_first++;
	    astarti += incaij2;
	  }
	}
      }
    }
  }


}				/* end BLAS_zhbmv_c_z */
Exemplo n.º 9
0
void BLAS_zsymv_d_z_x(enum blas_order_type order, enum blas_uplo_type uplo,
		      int n, const void *alpha, const double *a, int lda,
		      const void *x, int incx, const void *beta,
		      void *y, int incy, enum blas_prec_type prec)

/* 
 * Purpose
 * =======
 *
 * This routines computes the matrix product:
 *
 *     y  <-  alpha * A * x  +  beta * y
 * 
 * where A is a Symmetric matrix.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input symmetric matrix A.
 * 
 * uplo    (input) enum blas_uplo_type
 *         Determines which half of matrix A (upper or lower triangle)
 *         is accessed.
 *
 * n       (input) int
 *         Dimension of A and size of vectors x, y.
 *
 * alpha   (input) const void*
 * 
 * a       (input) double*
 *         Matrix A.
 *
 * lda     (input) int
 *         Leading dimension of matrix A.
 *
 * x       (input) void*
 *         Vector x.
 *   
 * incx    (input) int
 *         Stride for vector x.
 *
 * beta    (input) const void*
 * 
 * y       (input/output) void*
 *         Vector y.
 *
 * incy    (input) int
 *         Stride for vector y. 
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_zsymv_d_z_x";
  switch (prec) {

  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:{

      /* Integer Index Variables */
      int i, k;

      int xi, yi;
      int aik, astarti, x_starti, y_starti;

      int incai;
      int incaik, incaik2;

      int n_i;

      /* Input Matrices */
      const double *a_i = a;
      const double *x_i = (double *) x;

      /* Output Vector */
      double *y_i = (double *) y;

      /* Input Scalars */
      double *alpha_i = (double *) alpha;
      double *beta_i = (double *) beta;

      /* Temporary Floating-Point Variables */
      double a_elem;
      double x_elem[2];
      double y_elem[2];
      double prod[2];
      double sum[2];
      double tmp1[2];
      double tmp2[2];



      /* Test for no-op */
      if (n <= 0) {
	return;
      }
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
	  && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
	return;
      }

      /* Check for error conditions. */
      if (lda < n) {
	BLAS_error(routine_name, -3, n, NULL);
      }
      if (incx == 0) {
	BLAS_error(routine_name, -8, incx, NULL);
      }
      if (incy == 0) {
	BLAS_error(routine_name, -11, incy, NULL);
      }


      /* Set Index Parameters */
      n_i = n;

      if ((order == blas_colmajor && uplo == blas_upper) ||
	  (order == blas_rowmajor && uplo == blas_lower)) {
	incai = lda;
	incaik = 1;
	incaik2 = lda;
      } else {
	incai = 1;
	incaik = lda;
	incaik2 = 1;
      }

      /* Adjustment to increments (if any) */
      incx *= 2;
      incy *= 2;



      if (incx < 0) {
	x_starti = (-n + 1) * incx;
      } else {
	x_starti = 0;
      }
      if (incy < 0) {
	y_starti = (-n + 1) * incy;
      } else {
	y_starti = 0;
      }



      /* alpha = 0.  In this case, just return beta * y */
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
	for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    tmp1[0] =
	      (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
	    tmp1[1] =
	      (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
	  }
	  y_i[yi] = tmp1[0];
	  y_i[yi + 1] = tmp1[1];
	}
      } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {

	/* Case alpha == 1. */

	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incai) {
	    sum[0] = sum[1] = 0.0;

	    for (k = 0, aik = astarti, xi = x_starti; k < i;
		 k++, aik += incaik, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = x_elem[0] * a_elem;
		prod[1] = x_elem[1] * a_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = x_elem[0] * a_elem;
		prod[1] = x_elem[1] * a_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_i[yi] = sum[0];
	    y_i[yi + 1] = sum[1];
	  }
	} else {
	  /* Case alpha = 1, but beta != 0. 
	     We compute  y  <--- A * x + beta * y */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incai) {
	    sum[0] = sum[1] = 0.0;

	    for (k = 0, aik = astarti, xi = x_starti;
		 k < i; k++, aik += incaik, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = x_elem[0] * a_elem;
		prod[1] = x_elem[1] * a_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = x_elem[0] * a_elem;
		prod[1] = x_elem[1] * a_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_elem[0] = y_i[yi];
	    y_elem[1] = y_i[yi + 1];
	    {
	      tmp2[0] =
		(double) y_elem[0] * beta_i[0] -
		(double) y_elem[1] * beta_i[1];
	      tmp2[1] =
		(double) y_elem[0] * beta_i[1] +
		(double) y_elem[1] * beta_i[0];
	    }
	    tmp1[0] = sum[0];
	    tmp1[1] = sum[1];
	    tmp1[0] = tmp2[0] + tmp1[0];
	    tmp1[1] = tmp2[1] + tmp1[1];
	    y_i[yi] = tmp1[0];
	    y_i[yi + 1] = tmp1[1];
	  }
	}

      } else {
	/* The most general form,   y <--- alpha * A * x + beta * y */
	for (i = 0, yi = y_starti, astarti = 0;
	     i < n_i; i++, yi += incy, astarti += incai) {
	  sum[0] = sum[1] = 0.0;

	  for (k = 0, aik = astarti, xi = x_starti;
	       k < i; k++, aik += incaik, xi += incx) {
	    a_elem = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  for (; k < n_i; k++, aik += incaik2, xi += incx) {
	    a_elem = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem;
	      prod[1] = x_elem[1] * a_elem;
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    tmp2[0] =
	      (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
	    tmp2[1] =
	      (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
	  }
	  {
	    tmp1[0] =
	      (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
	    tmp1[1] =
	      (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
	  }
	  tmp1[0] = tmp2[0] + tmp1[0];
	  tmp1[1] = tmp2[1] + tmp1[1];
	  y_i[yi] = tmp1[0];
	  y_i[yi + 1] = tmp1[1];
	}
      }



      break;
    }

  case blas_prec_extra:{

      /* Integer Index Variables */
      int i, k;

      int xi, yi;
      int aik, astarti, x_starti, y_starti;

      int incai;
      int incaik, incaik2;

      int n_i;

      /* Input Matrices */
      const double *a_i = a;
      const double *x_i = (double *) x;

      /* Output Vector */
      double *y_i = (double *) y;

      /* Input Scalars */
      double *alpha_i = (double *) alpha;
      double *beta_i = (double *) beta;

      /* Temporary Floating-Point Variables */
      double a_elem;
      double x_elem[2];
      double y_elem[2];
      double head_prod[2], tail_prod[2];
      double head_sum[2], tail_sum[2];
      double head_tmp1[2], tail_tmp1[2];
      double head_tmp2[2], tail_tmp2[2];

      FPU_FIX_DECL;

      /* Test for no-op */
      if (n <= 0) {
	return;
      }
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
	  && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
	return;
      }

      /* Check for error conditions. */
      if (lda < n) {
	BLAS_error(routine_name, -3, n, NULL);
      }
      if (incx == 0) {
	BLAS_error(routine_name, -8, incx, NULL);
      }
      if (incy == 0) {
	BLAS_error(routine_name, -11, incy, NULL);
      }


      /* Set Index Parameters */
      n_i = n;

      if ((order == blas_colmajor && uplo == blas_upper) ||
	  (order == blas_rowmajor && uplo == blas_lower)) {
	incai = lda;
	incaik = 1;
	incaik2 = lda;
      } else {
	incai = 1;
	incaik = lda;
	incaik2 = 1;
      }

      /* Adjustment to increments (if any) */
      incx *= 2;
      incy *= 2;



      if (incx < 0) {
	x_starti = (-n + 1) * incx;
      } else {
	x_starti = 0;
      }
      if (incy < 0) {
	y_starti = (-n + 1) * incy;
      } else {
	y_starti = 0;
      }

      FPU_FIX_START;

      /* alpha = 0.  In this case, just return beta * y */
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
	for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    /* Compute complex-extra = complex-double * complex-double. */
	    double head_t1, tail_t1;
	    double head_t2, tail_t2;
	    /* Real part */
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[0] * split;
	      a1 = con - y_elem[0];
	      a1 = con - a1;
	      a2 = y_elem[0] - a1;
	      con = beta_i[0] * split;
	      b1 = con - beta_i[0];
	      b1 = con - b1;
	      b2 = beta_i[0] - b1;

	      head_t1 = y_elem[0] * beta_i[0];
	      tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[1] * split;
	      a1 = con - y_elem[1];
	      a1 = con - a1;
	      a2 = y_elem[1] - a1;
	      con = beta_i[1] * split;
	      b1 = con - beta_i[1];
	      b1 = con - b1;
	      b2 = beta_i[1] - b1;

	      head_t2 = y_elem[1] * beta_i[1];
	      tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    head_t2 = -head_t2;
	    tail_t2 = -tail_t2;
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp1[0] = head_t1;
	    tail_tmp1[0] = tail_t1;
	    /* Imaginary part */
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[1] * split;
	      a1 = con - y_elem[1];
	      a1 = con - a1;
	      a2 = y_elem[1] - a1;
	      con = beta_i[0] * split;
	      b1 = con - beta_i[0];
	      b1 = con - b1;
	      b2 = beta_i[0] - b1;

	      head_t1 = y_elem[1] * beta_i[0];
	      tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[0] * split;
	      a1 = con - y_elem[0];
	      a1 = con - a1;
	      a2 = y_elem[0] - a1;
	      con = beta_i[1] * split;
	      b1 = con - beta_i[1];
	      b1 = con - b1;
	      b2 = beta_i[1] - b1;

	      head_t2 = y_elem[0] * beta_i[1];
	      tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp1[1] = head_t1;
	    tail_tmp1[1] = tail_t1;
	  }
	  y_i[yi] = head_tmp1[0];
	  y_i[yi + 1] = head_tmp1[1];
	}
      } else if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {

	/* Case alpha == 1. */

	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incai) {
	    head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;

	    for (k = 0, aik = astarti, xi = x_starti; k < i;
		 k++, aik += incaik, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		/* Compute complex-extra = complex-double * real. */
		double head_t, tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[0] * split;
		  b1 = con - x_elem[0];
		  b1 = con - b1;
		  b2 = x_elem[0] - b1;

		  head_t = a_elem * x_elem[0];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[0] = head_t;
		tail_prod[0] = tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[1] * split;
		  b1 = con - x_elem[1];
		  b1 = con - b1;
		  b2 = x_elem[1] - b1;

		  head_t = a_elem * x_elem[1];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[1] = head_t;
		tail_prod[1] = tail_t;
	      }
	      {
		double head_t, tail_t;
		double head_a, tail_a;
		double head_b, tail_b;
		/* Real part */
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		head_b = head_prod[0];
		tail_b = tail_prod[0];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		/* Imaginary part */
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		head_b = head_prod[1];
		tail_b = tail_prod[1];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }
	    }
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		/* Compute complex-extra = complex-double * real. */
		double head_t, tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[0] * split;
		  b1 = con - x_elem[0];
		  b1 = con - b1;
		  b2 = x_elem[0] - b1;

		  head_t = a_elem * x_elem[0];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[0] = head_t;
		tail_prod[0] = tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[1] * split;
		  b1 = con - x_elem[1];
		  b1 = con - b1;
		  b2 = x_elem[1] - b1;

		  head_t = a_elem * x_elem[1];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[1] = head_t;
		tail_prod[1] = tail_t;
	      }
	      {
		double head_t, tail_t;
		double head_a, tail_a;
		double head_b, tail_b;
		/* Real part */
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		head_b = head_prod[0];
		tail_b = tail_prod[0];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		/* Imaginary part */
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		head_b = head_prod[1];
		tail_b = tail_prod[1];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }
	    }
	    y_i[yi] = head_sum[0];
	    y_i[yi + 1] = head_sum[1];
	  }
	} else {
	  /* Case alpha = 1, but beta != 0. 
	     We compute  y  <--- A * x + beta * y */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incai) {
	    head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;

	    for (k = 0, aik = astarti, xi = x_starti;
		 k < i; k++, aik += incaik, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		/* Compute complex-extra = complex-double * real. */
		double head_t, tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[0] * split;
		  b1 = con - x_elem[0];
		  b1 = con - b1;
		  b2 = x_elem[0] - b1;

		  head_t = a_elem * x_elem[0];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[0] = head_t;
		tail_prod[0] = tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[1] * split;
		  b1 = con - x_elem[1];
		  b1 = con - b1;
		  b2 = x_elem[1] - b1;

		  head_t = a_elem * x_elem[1];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[1] = head_t;
		tail_prod[1] = tail_t;
	      }
	      {
		double head_t, tail_t;
		double head_a, tail_a;
		double head_b, tail_b;
		/* Real part */
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		head_b = head_prod[0];
		tail_b = tail_prod[0];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		/* Imaginary part */
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		head_b = head_prod[1];
		tail_b = tail_prod[1];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }
	    }
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem = a_i[aik];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		/* Compute complex-extra = complex-double * real. */
		double head_t, tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[0] * split;
		  b1 = con - x_elem[0];
		  b1 = con - b1;
		  b2 = x_elem[0] - b1;

		  head_t = a_elem * x_elem[0];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[0] = head_t;
		tail_prod[0] = tail_t;
		{
		  /* Compute double_double = double * double. */
		  double a1, a2, b1, b2, con;

		  con = a_elem * split;
		  a1 = con - a_elem;
		  a1 = con - a1;
		  a2 = a_elem - a1;
		  con = x_elem[1] * split;
		  b1 = con - x_elem[1];
		  b1 = con - b1;
		  b2 = x_elem[1] - b1;

		  head_t = a_elem * x_elem[1];
		  tail_t =
		    (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
		}
		head_prod[1] = head_t;
		tail_prod[1] = tail_t;
	      }
	      {
		double head_t, tail_t;
		double head_a, tail_a;
		double head_b, tail_b;
		/* Real part */
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		head_b = head_prod[0];
		tail_b = tail_prod[0];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		/* Imaginary part */
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		head_b = head_prod[1];
		tail_b = tail_prod[1];
		{
		  /* Compute double-double = double-double + double-double. */
		  double bv;
		  double s1, s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }
	    }
	    y_elem[0] = y_i[yi];
	    y_elem[1] = y_i[yi + 1];
	    {
	      /* Compute complex-extra = complex-double * complex-double. */
	      double head_t1, tail_t1;
	      double head_t2, tail_t2;
	      /* Real part */
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = y_elem[0] * split;
		a1 = con - y_elem[0];
		a1 = con - a1;
		a2 = y_elem[0] - a1;
		con = beta_i[0] * split;
		b1 = con - beta_i[0];
		b1 = con - b1;
		b2 = beta_i[0] - b1;

		head_t1 = y_elem[0] * beta_i[0];
		tail_t1 =
		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = y_elem[1] * split;
		a1 = con - y_elem[1];
		a1 = con - a1;
		a2 = y_elem[1] - a1;
		con = beta_i[1] * split;
		b1 = con - beta_i[1];
		b1 = con - b1;
		b2 = beta_i[1] - b1;

		head_t2 = y_elem[1] * beta_i[1];
		tail_t2 =
		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      head_t2 = -head_t2;
	      tail_t2 = -tail_t2;
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_t1 + head_t2;
		bv = s1 - head_t1;
		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_t1 + tail_t2;
		bv = t1 - tail_t1;
		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t1 = t1 + t2;
		tail_t1 = t2 - (head_t1 - t1);
	      }
	      head_tmp2[0] = head_t1;
	      tail_tmp2[0] = tail_t1;
	      /* Imaginary part */
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = y_elem[1] * split;
		a1 = con - y_elem[1];
		a1 = con - a1;
		a2 = y_elem[1] - a1;
		con = beta_i[0] * split;
		b1 = con - beta_i[0];
		b1 = con - b1;
		b2 = beta_i[0] - b1;

		head_t1 = y_elem[1] * beta_i[0];
		tail_t1 =
		  (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = y_elem[0] * split;
		a1 = con - y_elem[0];
		a1 = con - a1;
		a2 = y_elem[0] - a1;
		con = beta_i[1] * split;
		b1 = con - beta_i[1];
		b1 = con - b1;
		b2 = beta_i[1] - b1;

		head_t2 = y_elem[0] * beta_i[1];
		tail_t2 =
		  (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_t1 + head_t2;
		bv = s1 - head_t1;
		s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_t1 + tail_t2;
		bv = t1 - tail_t1;
		t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t1 = t1 + t2;
		tail_t1 = t2 - (head_t1 - t1);
	      }
	      head_tmp2[1] = head_t1;
	      tail_tmp2[1] = tail_t1;
	    }
	    head_tmp1[0] = head_sum[0];
	    tail_tmp1[0] = tail_sum[0];
	    head_tmp1[1] = head_sum[1];
	    tail_tmp1[1] = tail_sum[1];
	    {
	      double head_t, tail_t;
	      double head_a, tail_a;
	      double head_b, tail_b;
	      /* Real part */
	      head_a = head_tmp2[0];
	      tail_a = tail_tmp2[0];
	      head_b = head_tmp1[0];
	      tail_b = tail_tmp1[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_tmp1[0] = head_t;
	      tail_tmp1[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_tmp2[1];
	      tail_a = tail_tmp2[1];
	      head_b = head_tmp1[1];
	      tail_b = tail_tmp1[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_tmp1[1] = head_t;
	      tail_tmp1[1] = tail_t;
	    }
	    y_i[yi] = head_tmp1[0];
	    y_i[yi + 1] = head_tmp1[1];
	  }
	}

      } else {
	/* The most general form,   y <--- alpha * A * x + beta * y */
	for (i = 0, yi = y_starti, astarti = 0;
	     i < n_i; i++, yi += incy, astarti += incai) {
	  head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;

	  for (k = 0, aik = astarti, xi = x_starti;
	       k < i; k++, aik += incaik, xi += incx) {
	    a_elem = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      /* Compute complex-extra = complex-double * real. */
	      double head_t, tail_t;
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = a_elem * split;
		a1 = con - a_elem;
		a1 = con - a1;
		a2 = a_elem - a1;
		con = x_elem[0] * split;
		b1 = con - x_elem[0];
		b1 = con - b1;
		b2 = x_elem[0] - b1;

		head_t = a_elem * x_elem[0];
		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      head_prod[0] = head_t;
	      tail_prod[0] = tail_t;
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = a_elem * split;
		a1 = con - a_elem;
		a1 = con - a1;
		a2 = a_elem - a1;
		con = x_elem[1] * split;
		b1 = con - x_elem[1];
		b1 = con - b1;
		b2 = x_elem[1] - b1;

		head_t = a_elem * x_elem[1];
		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      head_prod[1] = head_t;
	      tail_prod[1] = tail_t;
	    }
	    {
	      double head_t, tail_t;
	      double head_a, tail_a;
	      double head_b, tail_b;
	      /* Real part */
	      head_a = head_sum[0];
	      tail_a = tail_sum[0];
	      head_b = head_prod[0];
	      tail_b = tail_prod[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum[0] = head_t;
	      tail_sum[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_sum[1];
	      tail_a = tail_sum[1];
	      head_b = head_prod[1];
	      tail_b = tail_prod[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum[1] = head_t;
	      tail_sum[1] = tail_t;
	    }
	  }
	  for (; k < n_i; k++, aik += incaik2, xi += incx) {
	    a_elem = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      /* Compute complex-extra = complex-double * real. */
	      double head_t, tail_t;
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = a_elem * split;
		a1 = con - a_elem;
		a1 = con - a1;
		a2 = a_elem - a1;
		con = x_elem[0] * split;
		b1 = con - x_elem[0];
		b1 = con - b1;
		b2 = x_elem[0] - b1;

		head_t = a_elem * x_elem[0];
		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      head_prod[0] = head_t;
	      tail_prod[0] = tail_t;
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = a_elem * split;
		a1 = con - a_elem;
		a1 = con - a1;
		a2 = a_elem - a1;
		con = x_elem[1] * split;
		b1 = con - x_elem[1];
		b1 = con - b1;
		b2 = x_elem[1] - b1;

		head_t = a_elem * x_elem[1];
		tail_t = (((a1 * b1 - head_t) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      head_prod[1] = head_t;
	      tail_prod[1] = tail_t;
	    }
	    {
	      double head_t, tail_t;
	      double head_a, tail_a;
	      double head_b, tail_b;
	      /* Real part */
	      head_a = head_sum[0];
	      tail_a = tail_sum[0];
	      head_b = head_prod[0];
	      tail_b = tail_prod[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum[0] = head_t;
	      tail_sum[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_sum[1];
	      tail_a = tail_sum[1];
	      head_b = head_prod[1];
	      tail_b = tail_prod[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum[1] = head_t;
	      tail_sum[1] = tail_t;
	    }
	  }
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    /* Compute complex-extra = complex-double * complex-double. */
	    double head_t1, tail_t1;
	    double head_t2, tail_t2;
	    /* Real part */
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[0] * split;
	      a1 = con - y_elem[0];
	      a1 = con - a1;
	      a2 = y_elem[0] - a1;
	      con = beta_i[0] * split;
	      b1 = con - beta_i[0];
	      b1 = con - b1;
	      b2 = beta_i[0] - b1;

	      head_t1 = y_elem[0] * beta_i[0];
	      tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[1] * split;
	      a1 = con - y_elem[1];
	      a1 = con - a1;
	      a2 = y_elem[1] - a1;
	      con = beta_i[1] * split;
	      b1 = con - beta_i[1];
	      b1 = con - b1;
	      b2 = beta_i[1] - b1;

	      head_t2 = y_elem[1] * beta_i[1];
	      tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    head_t2 = -head_t2;
	    tail_t2 = -tail_t2;
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp2[0] = head_t1;
	    tail_tmp2[0] = tail_t1;
	    /* Imaginary part */
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[1] * split;
	      a1 = con - y_elem[1];
	      a1 = con - a1;
	      a2 = y_elem[1] - a1;
	      con = beta_i[0] * split;
	      b1 = con - beta_i[0];
	      b1 = con - b1;
	      b2 = beta_i[0] - b1;

	      head_t1 = y_elem[1] * beta_i[0];
	      tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = y_elem[0] * split;
	      a1 = con - y_elem[0];
	      a1 = con - a1;
	      a2 = y_elem[0] - a1;
	      con = beta_i[1] * split;
	      b1 = con - beta_i[1];
	      b1 = con - b1;
	      b2 = beta_i[1] - b1;

	      head_t2 = y_elem[0] * beta_i[1];
	      tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp2[1] = head_t1;
	    tail_tmp2[1] = tail_t1;
	  }
	  {
	    /* Compute complex-extra = complex-extra * complex-double. */
	    double head_a0, tail_a0;
	    double head_a1, tail_a1;
	    double head_t1, tail_t1;
	    double head_t2, tail_t2;
	    head_a0 = head_sum[0];
	    tail_a0 = tail_sum[0];
	    head_a1 = head_sum[1];
	    tail_a1 = tail_sum[1];
	    /* real part */
	    {
	      /* Compute double-double = double-double * double. */
	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a0 * split;
	      a11 = con - head_a0;
	      a11 = con - a11;
	      a21 = head_a0 - a11;
	      con = alpha_i[0] * split;
	      b1 = con - alpha_i[0];
	      b1 = con - b1;
	      b2 = alpha_i[0] - b1;

	      c11 = head_a0 * alpha_i[0];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a0 * alpha_i[0];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    {
	      /* Compute double-double = double-double * double. */
	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a1 * split;
	      a11 = con - head_a1;
	      a11 = con - a11;
	      a21 = head_a1 - a11;
	      con = alpha_i[1] * split;
	      b1 = con - alpha_i[1];
	      b1 = con - b1;
	      b2 = alpha_i[1] - b1;

	      c11 = head_a1 * alpha_i[1];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a1 * alpha_i[1];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t2 = t1 + t2;
	      tail_t2 = t2 - (head_t2 - t1);
	    }
	    head_t2 = -head_t2;
	    tail_t2 = -tail_t2;
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp1[0] = head_t1;
	    tail_tmp1[0] = tail_t1;
	    /* imaginary part */
	    {
	      /* Compute double-double = double-double * double. */
	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a1 * split;
	      a11 = con - head_a1;
	      a11 = con - a11;
	      a21 = head_a1 - a11;
	      con = alpha_i[0] * split;
	      b1 = con - alpha_i[0];
	      b1 = con - b1;
	      b2 = alpha_i[0] - b1;

	      c11 = head_a1 * alpha_i[0];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a1 * alpha_i[0];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    {
	      /* Compute double-double = double-double * double. */
	      double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a0 * split;
	      a11 = con - head_a0;
	      a11 = con - a11;
	      a21 = head_a0 - a11;
	      con = alpha_i[1] * split;
	      b1 = con - alpha_i[1];
	      b1 = con - b1;
	      b2 = alpha_i[1] - b1;

	      c11 = head_a0 * alpha_i[1];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a0 * alpha_i[1];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t2 = t1 + t2;
	      tail_t2 = t2 - (head_t2 - t1);
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp1[1] = head_t1;
	    tail_tmp1[1] = tail_t1;
	  }

	  {
	    double head_t, tail_t;
	    double head_a, tail_a;
	    double head_b, tail_b;
	    /* Real part */
	    head_a = head_tmp2[0];
	    tail_a = tail_tmp2[0];
	    head_b = head_tmp1[0];
	    tail_b = tail_tmp1[0];
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_a + head_b;
	      bv = s1 - head_a;
	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_a + tail_b;
	      bv = t1 - tail_a;
	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t = t1 + t2;
	      tail_t = t2 - (head_t - t1);
	    }
	    head_tmp1[0] = head_t;
	    tail_tmp1[0] = tail_t;
	    /* Imaginary part */
	    head_a = head_tmp2[1];
	    tail_a = tail_tmp2[1];
	    head_b = head_tmp1[1];
	    tail_b = tail_tmp1[1];
	    {
	      /* Compute double-double = double-double + double-double. */
	      double bv;
	      double s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_a + head_b;
	      bv = s1 - head_a;
	      s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_a + tail_b;
	      bv = t1 - tail_a;
	      t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t = t1 + t2;
	      tail_t = t2 - (head_t - t1);
	    }
	    head_tmp1[1] = head_t;
	    tail_tmp1[1] = tail_t;
	  }
	  y_i[yi] = head_tmp1[0];
	  y_i[yi + 1] = head_tmp1[1];
	}
      }

      FPU_FIX_STOP;

      break;
    }
  }
}				/* end BLAS_zsymv_d_z_x */
Exemplo n.º 10
0
void		BLAS_dsymv_s_s(enum blas_order_type order, enum blas_uplo_type uplo,
		     		int		n       , double alpha, const float *a, int lda,
		 		const		float *x, int incx, double beta,
		     		double       *y, int incy)
/*
 * Purpose
 * =======
 *
 * This routines computes the matrix product:
 *
 *     y  <-  alpha * A * x  +  beta * y
 *
 * where A is a Symmetric matrix.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input symmetric matrix A.
 *
 * uplo    (input) enum blas_uplo_type
 *         Determines which half of matrix A (upper or lower triangle)
 *         is accessed.
 *
 * n       (input) int
 *         Dimension of A and size of vectors x, y.
 *
 * alpha   (input) double
 *
 * a       (input) float*
 *         Matrix A.
 *
 * lda     (input) int
 *         Leading dimension of matrix A.
 *
 * x       (input) float*
 *         Vector x.
 *
 * incx    (input) int
 *         Stride for vector x.
 *
 * beta    (input) double
 *
 * y       (input/output) double*
 *         Vector y.
 *
 * incy    (input) int
 *         Stride for vector y.
 *
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_dsymv_s_s";

  /* Integer Index Variables */
  int		  i       , k;

  int		  xi      , yi;
  int		  aik     , astarti, x_starti, y_starti;

  int		  incai;
  int		  incaik  , incaik2;

  int		  n_i;

  /* Input Matrices */
  const float    *a_i = a;
  const float    *x_i = x;

  /* Output Vector */
  double         *y_i = y;

  /* Input Scalars */
  double	  alpha_i = alpha;
  double	  beta_i = beta;

  /* Temporary Floating-Point Variables */
  float		  a_elem;
  float		  x_elem;
  double	  y_elem;
  double	  prod;
  double	  sum;
  double	  tmp1;
  double	  tmp2;



  /* Test for no-op */
  if (n <= 0) {
    return;
  }
  if (alpha_i == 0.0 && beta_i == 1.0) {
    return;
  }
  /* Check for error conditions. */
  if (lda < n) {
    BLAS_error(routine_name, -3, n, NULL);
  }
  if (incx == 0) {
    BLAS_error(routine_name, -8, incx, NULL);
  }
  if (incy == 0) {
    BLAS_error(routine_name, -11, incy, NULL);
  }
  /* Set Index Parameters */
  n_i = n;

  if ((order == blas_colmajor && uplo == blas_upper) ||
      (order == blas_rowmajor && uplo == blas_lower)) {
    incai = lda;
    incaik = 1;
    incaik2 = lda;
  } else {
    incai = 1;
    incaik = lda;
    incaik2 = 1;
  }

  /* Adjustment to increments (if any) */





  if (incx < 0) {
    x_starti = (-n + 1) * incx;
  } else {
    x_starti = 0;
  }
  if (incy < 0) {
    y_starti = (-n + 1) * incy;
  } else {
    y_starti = 0;
  }



  /* alpha = 0.  In this case, just return beta * y */
  if (alpha_i == 0.0) {
    for (i = 0, yi = y_starti;
	 i < n_i; i++, yi += incy) {
      y_elem = y_i[yi];
      tmp1 = y_elem * beta_i;
      y_i[yi] = tmp1;
    }
  } else if (alpha_i == 1.0) {

    /* Case alpha == 1. */

    if (beta_i == 0.0) {
      /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
      for (i = 0, yi = y_starti, astarti = 0;
	   i < n_i; i++, yi += incy, astarti += incai) {
	sum = 0.0;

	for (k = 0, aik = astarti, xi = x_starti; k < i;
	     k++, aik += incaik, xi += incx) {
	  a_elem = a_i[aik];
	  x_elem = x_i[xi];
	  prod = (double)a_elem *x_elem;
	  sum = sum + prod;
	}
	for (; k < n_i; k++, aik += incaik2, xi += incx) {
	  a_elem = a_i[aik];
	  x_elem = x_i[xi];
	  prod = (double)a_elem *x_elem;
	  sum = sum + prod;
	}
	y_i[yi] = sum;
      }
    } else {
      /*
       * Case alpha = 1, but beta != 0. We compute  y  <--- A * x + beta * y
       */
      for (i = 0, yi = y_starti, astarti = 0;
	   i < n_i; i++, yi += incy, astarti += incai) {
	sum = 0.0;

	for (k = 0, aik = astarti, xi = x_starti;
	     k < i; k++, aik += incaik, xi += incx) {
	  a_elem = a_i[aik];
	  x_elem = x_i[xi];
	  prod = (double)a_elem *x_elem;
	  sum = sum + prod;
	}
	for (; k < n_i; k++, aik += incaik2, xi += incx) {
	  a_elem = a_i[aik];
	  x_elem = x_i[xi];
	  prod = (double)a_elem *x_elem;
	  sum = sum + prod;
	}
	y_elem = y_i[yi];
	tmp2 = y_elem * beta_i;
	tmp1 = sum;
	tmp1 = tmp2 + tmp1;
	y_i[yi] = tmp1;
      }
    }

  } else {
    /* The most general form,   y <--- alpha * A * x + beta * y */
    for (i = 0, yi = y_starti, astarti = 0;
	 i < n_i; i++, yi += incy, astarti += incai) {
      sum = 0.0;

      for (k = 0, aik = astarti, xi = x_starti;
	   k < i; k++, aik += incaik, xi += incx) {
	a_elem = a_i[aik];
	x_elem = x_i[xi];
	prod = (double)a_elem *x_elem;
	sum = sum + prod;
      }
      for (; k < n_i; k++, aik += incaik2, xi += incx) {
	a_elem = a_i[aik];
	x_elem = x_i[xi];
	prod = (double)a_elem *x_elem;
	sum = sum + prod;
      }
      y_elem = y_i[yi];
      tmp2 = y_elem * beta_i;
      tmp1 = sum * alpha_i;
      tmp1 = tmp2 + tmp1;
      y_i[yi] = tmp1;
    }
  }



}				/* end BLAS_dsymv_s_s */
Exemplo n.º 11
0
void		BLAS_dwaxpby_s_d(int n, double alpha, const float *x, int incx,
	      		double	beta  , const double *y, int incy, double *w,
		       		int		incw)
/*
 * Purpose
 * =======
 *
 * This routine computes:
 *
 *     w <- alpha * x + beta * y
 *
 * Arguments
 * =========
 *
 * n     (input) int
 *       The length of vectors x, y, and w.
 *
 * alpha (input) double
 *
 * x     (input) const float*
 *       Array of length n.
 *
 * incx  (input) int
 *       The stride used to access components x[i].
 *
 * beta  (input) double
 *
 * y     (input) double*
 *       Array of length n.
 *
 * incy  (input) int
 *       The stride used to access components y[i].
 *
 * w     (output) double*
 *       Array of length n.
 *
 * incw  (input) int
 *       The stride used to write components w[i].
 *
 */
{
  char           *routine_name = "BLAS_dwaxpby_s_d";

  int		  i       , ix = 0, iy = 0, iw = 0;
  double         *w_i = w;
  const float    *x_i = x;
  const double   *y_i = y;
  double	  alpha_i = alpha;
  double	  beta_i = beta;
  float		  x_ii;
  double	  y_ii;
  double	  tmpx;
  double	  tmpy;



  /* Test the input parameters. */
  if (incx == 0)
    BLAS_error(routine_name, -4, incx, NULL);
  else if (incy == 0)
    BLAS_error(routine_name, -7, incy, NULL);
  else if (incw == 0)
    BLAS_error(routine_name, -9, incw, NULL);


  /* Immediate return */
  if (n <= 0) {
    return;
  }
  if (incx < 0)
    ix = (-n + 1) * incx;
  if (incy < 0)
    iy = (-n + 1) * incy;
  if (incw < 0)
    iw = (-n + 1) * incw;

  for (i = 0; i < n; ++i) {
    x_ii = x_i[ix];
    y_ii = y_i[iy];
    tmpx = alpha_i * x_ii;	/* tmpx  = alpha * x[ix] */
    tmpy = beta_i * y_ii;	/* tmpy = beta * y[iy] */
    tmpy = tmpy + tmpx;
    w_i[iw] = tmpy;
    ix += incx;
    iy += incy;
    iw += incw;
  }				/* endfor */



}
Exemplo n.º 12
0
void		BLAS_zaxpby_c_x(int n, const void *alpha, const void *x, int incx,
		      		const		void  *beta, void *y,
	       		int		incy    , enum blas_prec_type prec)
/*
 * Purpose
 * =======
 *
 * This routine computes:
 *
 *      y <- alpha * x + beta * y.
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The length of vectors x and y.
 *
 * alpha     (input) const void*
 *
 * x         (input) const void*
 *           Array of length n.
 *
 * incx      (input) int
 *           The stride used to access components x[i].
 *
 * beta      (input) const void*
 *
 * y         (input) void*
 *           Array of length n.
 *
 * incy      (input) int
 *           The stride used to access components y[i].
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  static const char routine_name[] = "BLAS_zaxpby_c_x";

  switch (prec) {
  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:
    {
      int	      i    , ix = 0, iy = 0;
      const float    *x_i = (float *)x;
      double         *y_i = (double *)y;
      double         *alpha_i = (double *)alpha;
      double         *beta_i = (double *)beta;
      float	      x_ii[2];
      double	      y_ii[2];
      double	      tmpx[2];
      double	      tmpy[2];


      /* Test the input parameters. */
      if (incx == 0)
	BLAS_error(routine_name, -4, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -7, incy, NULL);

      /* Immediate return */
      if (n <= 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)))
	return;



      incx *= 2;
      incy *= 2;
      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;

      for (i = 0; i < n; ++i) {
	x_ii[0] = x_i[ix];
	x_ii[1] = x_i[ix + 1];
	y_ii[0] = y_i[iy];
	y_ii[1] = y_i[iy + 1];
	{
	  tmpx[0] = (double)alpha_i[0] * x_ii[0] - (double)alpha_i[1] * x_ii[1];
	  tmpx[1] = (double)alpha_i[0] * x_ii[1] + (double)alpha_i[1] * x_ii[0];
	}			/* tmpx  = alpha * x[ix] */
	{
	  tmpy[0] = (double)beta_i[0] * y_ii[0] - (double)beta_i[1] * y_ii[1];
	  tmpy[1] = (double)beta_i[0] * y_ii[1] + (double)beta_i[1] * y_ii[0];
	}			/* tmpy = beta * y[iy] */
	tmpy[0] = tmpy[0] + tmpx[0];
	tmpy[1] = tmpy[1] + tmpx[1];
	y_i[iy] = tmpy[0];
	y_i[iy + 1] = tmpy[1];
	ix += incx;
	iy += incy;
      }				/* endfor */


    }
    break;
  case blas_prec_extra:
    {
      int	      i    , ix = 0, iy = 0;
      const float    *x_i = (float *)x;
      double         *y_i = (double *)y;
      double         *alpha_i = (double *)alpha;
      double         *beta_i = (double *)beta;
      float	      x_ii[2];
      double	      y_ii[2];
      double	      head_tmpx[2], tail_tmpx[2];
      double	      head_tmpy[2], tail_tmpy[2];
      FPU_FIX_DECL;

      /* Test the input parameters. */
      if (incx == 0)
	BLAS_error(routine_name, -4, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -7, incy, NULL);

      /* Immediate return */
      if (n <= 0 || (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)))
	return;

      FPU_FIX_START;

      incx *= 2;
      incy *= 2;
      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;

      for (i = 0; i < n; ++i) {
	x_ii[0] = x_i[ix];
	x_ii[1] = x_i[ix + 1];
	y_ii[0] = y_i[iy];
	y_ii[1] = y_i[iy + 1];
	{
	  double	  cd     [2];
	  cd[0] = (double)x_ii[0];
	  cd[1] = (double)x_ii[1];
	  {
	    /* Compute complex-extra = complex-double * complex-double. */
	    double	    head_t1, tail_t1;
	    double	    head_t2, tail_t2;
	    /* Real part */
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = alpha_i[0] * split;
	      a1 = con - alpha_i[0];
	      a1 = con - a1;
	      a2 = alpha_i[0] - a1;
	      con = cd[0] * split;
	      b1 = con - cd[0];
	      b1 = con - b1;
	      b2 = cd[0] - b1;

	      head_t1 = alpha_i[0] * cd[0];
	      tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = alpha_i[1] * split;
	      a1 = con - alpha_i[1];
	      a1 = con - a1;
	      a2 = alpha_i[1] - a1;
	      con = cd[1] * split;
	      b1 = con - cd[1];
	      b1 = con - b1;
	      b2 = cd[1] - b1;

	      head_t2 = alpha_i[1] * cd[1];
	      tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    head_t2 = -head_t2;
	    tail_t2 = -tail_t2;
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmpx[0] = head_t1;
	    tail_tmpx[0] = tail_t1;
	    /* Imaginary part */
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = alpha_i[1] * split;
	      a1 = con - alpha_i[1];
	      a1 = con - a1;
	      a2 = alpha_i[1] - a1;
	      con = cd[0] * split;
	      b1 = con - cd[0];
	      b1 = con - b1;
	      b2 = cd[0] - b1;

	      head_t1 = alpha_i[1] * cd[0];
	      tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = alpha_i[0] * split;
	      a1 = con - alpha_i[0];
	      a1 = con - a1;
	      a2 = alpha_i[0] - a1;
	      con = cd[1] * split;
	      b1 = con - cd[1];
	      b1 = con - b1;
	      b2 = cd[1] - b1;

	      head_t2 = alpha_i[0] * cd[1];
	      tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmpx[1] = head_t1;
	    tail_tmpx[1] = tail_t1;
	  }
	}			/* tmpx  = alpha * x[ix] */
	{
	  /* Compute complex-extra = complex-double * complex-double. */
	  double	  head_t1, tail_t1;
	  double	  head_t2, tail_t2;
	  /* Real part */
	  {
	    /* Compute double_double = double * double. */
	    double	    a1  , a2, b1, b2, con;

	    con = beta_i[0] * split;
	    a1 = con - beta_i[0];
	    a1 = con - a1;
	    a2 = beta_i[0] - a1;
	    con = y_ii[0] * split;
	    b1 = con - y_ii[0];
	    b1 = con - b1;
	    b2 = y_ii[0] - b1;

	    head_t1 = beta_i[0] * y_ii[0];
	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	  }
	  {
	    /* Compute double_double = double * double. */
	    double	    a1  , a2, b1, b2, con;

	    con = beta_i[1] * split;
	    a1 = con - beta_i[1];
	    a1 = con - a1;
	    a2 = beta_i[1] - a1;
	    con = y_ii[1] * split;
	    b1 = con - y_ii[1];
	    b1 = con - b1;
	    b2 = y_ii[1] - b1;

	    head_t2 = beta_i[1] * y_ii[1];
	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	  }
	  head_t2 = -head_t2;
	  tail_t2 = -tail_t2;
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_t1 + head_t2;
	    bv = s1 - head_t1;
	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_t1 + tail_t2;
	    bv = t1 - tail_t1;
	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_t1 = t1 + t2;
	    tail_t1 = t2 - (head_t1 - t1);
	  }
	  head_tmpy[0] = head_t1;
	  tail_tmpy[0] = tail_t1;
	  /* Imaginary part */
	  {
	    /* Compute double_double = double * double. */
	    double	    a1  , a2, b1, b2, con;

	    con = beta_i[1] * split;
	    a1 = con - beta_i[1];
	    a1 = con - a1;
	    a2 = beta_i[1] - a1;
	    con = y_ii[0] * split;
	    b1 = con - y_ii[0];
	    b1 = con - b1;
	    b2 = y_ii[0] - b1;

	    head_t1 = beta_i[1] * y_ii[0];
	    tail_t1 = (((a1 * b1 - head_t1) + a1 * b2) + a2 * b1) + a2 * b2;
	  }
	  {
	    /* Compute double_double = double * double. */
	    double	    a1  , a2, b1, b2, con;

	    con = beta_i[0] * split;
	    a1 = con - beta_i[0];
	    a1 = con - a1;
	    a2 = beta_i[0] - a1;
	    con = y_ii[1] * split;
	    b1 = con - y_ii[1];
	    b1 = con - b1;
	    b2 = y_ii[1] - b1;

	    head_t2 = beta_i[0] * y_ii[1];
	    tail_t2 = (((a1 * b1 - head_t2) + a1 * b2) + a2 * b1) + a2 * b2;
	  }
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_t1 + head_t2;
	    bv = s1 - head_t1;
	    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_t1 + tail_t2;
	    bv = t1 - tail_t1;
	    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_t1 = t1 + t2;
	    tail_t1 = t2 - (head_t1 - t1);
	  }
	  head_tmpy[1] = head_t1;
	  tail_tmpy[1] = tail_t1;
	}			/* tmpy = beta * y[iy] */
	{
	  double	  head_t, tail_t;
	  double	  head_a, tail_a;
	  double	  head_b, tail_b;
	  /* Real part */
	  head_a = head_tmpy[0];
	  tail_a = tail_tmpy[0];
	  head_b = head_tmpx[0];
	  tail_b = tail_tmpx[0];
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_a + head_b;
	    bv = s1 - head_a;
	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_a + tail_b;
	    bv = t1 - tail_a;
	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_t = t1 + t2;
	    tail_t = t2 - (head_t - t1);
	  }
	  head_tmpy[0] = head_t;
	  tail_tmpy[0] = tail_t;
	  /* Imaginary part */
	  head_a = head_tmpy[1];
	  tail_a = tail_tmpy[1];
	  head_b = head_tmpx[1];
	  tail_b = tail_tmpx[1];
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_a + head_b;
	    bv = s1 - head_a;
	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_a + tail_b;
	    bv = t1 - tail_a;
	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_t = t1 + t2;
	    tail_t = t2 - (head_t - t1);
	  }
	  head_tmpy[1] = head_t;
	  tail_tmpy[1] = tail_t;
	}
	y_i[iy] = head_tmpy[0];
	y_i[iy + 1] = head_tmpy[1];
	ix += incx;
	iy += incy;
      }				/* endfor */

      FPU_FIX_STOP;
    }
    break;
  }
}				/* end BLAS_zaxpby_c_x */
Exemplo n.º 13
0
void BLAS_cgbmv_c_s(enum blas_order_type order, enum blas_trans_type trans,
		    int m, int n, int kl, int ku, const void *alpha,
		    const void *a, int lda, const float *x, int incx,
		    const void *beta, void *y, int incy)

/*           
 * Purpose
 * =======
 *
 *  gbmv computes y = alpha * A * x + beta * y, where 
 *
 *  A is a m x n banded matrix
 *  x is a n x 1 vector
 *  y is a m x 1 vector
 *  alpha and beta are scalars 
 *
 *   
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of AP; row or column major
 *
 * trans        (input) blas_trans_type
 *              Transpose of AB; no trans, 
 *              trans, or conjugate trans
 *
 * m            (input) int
 *              Dimension of AB
 *
 * n            (input) int
 *              Dimension of AB and the length of vector x
 *
 * kl           (input) int 
 *              Number of lower diagnols of AB
 *
 * ku           (input) int
 *              Number of upper diagnols of AB
 *
 * alpha        (input) const void*
 *              
 * AB           (input) void*
 *
 * lda          (input) int 
 *              Leading dimension of AB
 *              lda >= ku + kl + 1
 *
 * x            (input) float*
 * 
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) const void*
 *
 * y            (input/output) void*
 *
 * incy         (input) int
 *              The stride for vector y.
 * 
 *
 * LOCAL VARIABLES 
 * ===============
 * 
 *  As an example, these variables are described on the mxn, column 
 *  major, banded matrix described in section 2.2.3 of the specification  
 *
 *  astart      indexes first element in A where computation begins
 *
 *  incai1      indexes first element in row where row is less than lbound
 * 
 *  incai2      indexes first element in row where row exceeds lbound
 *   
 *  lbound      denotes the number of rows before  first element shifts 
 *
 *  rbound      denotes the columns where there is blank space
 *   
 *  ra          index of the rightmost element for a given row
 *  
 *  la          index of leftmost  elements for a given row
 *
 *  ra - la     width of a row
 *
 *                        rbound 
 *            la   ra    ____|_____ 
 *             |    |   |          |
 *         |  a00  a01   *    *   *
 * lbound -|  a10  a11  a12   *   *
 *         |  a20  a21  a22  a23  *
 *             *   a31  a32  a33 a34
 *             *    *   a42  a43 a44
 *
 *  Varations on order and transpose have been implemented by modifying these
 *  local variables. 
 *
 */
{
  static const char routine_name[] = "BLAS_cgbmv_c_s";

  int ky, iy, kx, jx, j, i, rbound, lbound, ra, la, lenx, leny;
  int incaij, aij, incai1, incai2, astart, ai;
  float *y_i = (float *) y;
  const float *a_i = (float *) a;
  const float *x_i = x;
  float *alpha_i = (float *) alpha;
  float *beta_i = (float *) beta;
  float tmp1[2];
  float tmp2[2];
  float result[2];
  float sum[2];
  float prod[2];
  float a_elem[2];
  float x_elem;
  float y_elem[2];


  if (order != blas_colmajor && order != blas_rowmajor)
    BLAS_error(routine_name, -1, order, NULL);
  if (trans != blas_no_trans &&
      trans != blas_trans && trans != blas_conj_trans) {
    BLAS_error(routine_name, -2, trans, NULL);
  }
  if (m < 0)
    BLAS_error(routine_name, -3, m, NULL);
  if (n < 0)
    BLAS_error(routine_name, -4, n, NULL);
  if (kl < 0 || kl >= m)
    BLAS_error(routine_name, -5, kl, NULL);
  if (ku < 0 || ku >= n)
    BLAS_error(routine_name, -6, ku, NULL);
  if (lda < kl + ku + 1)
    BLAS_error(routine_name, -9, lda, NULL);
  if (incx == 0)
    BLAS_error(routine_name, -11, incx, NULL);
  if (incy == 0)
    BLAS_error(routine_name, -14, incy, NULL);

  if ((m == 0) || (n == 0) ||
      (((alpha_i[0] == 0.0 && alpha_i[1] == 0.0)
	&& ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))))
    return;

  if (trans == blas_no_trans) {
    lenx = n;
    leny = m;
  } else {			/* change back */
    lenx = m;
    leny = n;
  }

  if (incx < 0) {
    kx = -(lenx - 1) * incx;
  } else {
    kx = 0;
  }

  if (incy < 0) {
    ky = -(leny - 1) * incy;
  } else {
    ky = 0;
  }



  /* if alpha = 0, return y = y*beta */
  if ((order == blas_colmajor) && (trans == blas_no_trans)) {
    astart = ku;
    incai1 = 1;
    incai2 = lda;
    incaij = lda - 1;
    lbound = kl;
    rbound = n - ku - 1;
    ra = ku;
  } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
    astart = ku;
    incai1 = lda - 1;
    incai2 = lda;
    incaij = 1;
    lbound = ku;
    rbound = m - kl - 1;
    ra = kl;
  } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
    astart = kl;
    incai1 = lda - 1;
    incai2 = lda;
    incaij = 1;
    lbound = kl;
    rbound = n - ku - 1;
    ra = ku;
  } else {			/* rowmajor and blas_trans */
    astart = kl;
    incai1 = 1;
    incai2 = lda;
    incaij = lda - 1;
    lbound = ku;
    rbound = m - kl - 1;
    ra = kl;
  }

  incy *= 2;
  incaij *= 2;
  incai1 *= 2;
  incai2 *= 2;
  astart *= 2;
  ky *= 2;


  la = 0;
  ai = astart;
  iy = ky;
  for (i = 0; i < leny; i++) {
    sum[0] = sum[1] = 0.0;
    aij = ai;
    jx = kx;
    if (trans != blas_conj_trans) {
      for (j = ra - la; j >= 0; j--) {
	x_elem = x_i[jx];
	a_elem[0] = a_i[aij];
	a_elem[1] = a_i[aij + 1];
	{
	  prod[0] = a_elem[0] * x_elem;
	  prod[1] = a_elem[1] * x_elem;
	}
	sum[0] = sum[0] + prod[0];
	sum[1] = sum[1] + prod[1];
	aij += incaij;
	jx += incx;
      }

    } else {
      for (j = ra - la; j >= 0; j--) {
	x_elem = x_i[jx];
	a_elem[0] = a_i[aij];
	a_elem[1] = a_i[aij + 1];
	a_elem[1] = -a_elem[1];
	{
	  prod[0] = a_elem[0] * x_elem;
	  prod[1] = a_elem[1] * x_elem;
	}
	sum[0] = sum[0] + prod[0];
	sum[1] = sum[1] + prod[1];
	aij += incaij;
	jx += incx;
      }
    }

    {
      tmp1[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
      tmp1[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
    }

    y_elem[0] = y_i[iy];
    y_elem[1] = y_i[iy + 1];
    {
      tmp2[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
      tmp2[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
    }

    result[0] = tmp1[0] + tmp2[0];
    result[1] = tmp1[1] + tmp2[1];
    y_i[iy] = result[0];
    y_i[iy + 1] = result[1];
    iy += incy;
    if (i >= lbound) {
      kx += incx;
      ai += incai2;
      la++;
    } else {
      ai += incai1;
    }
    if (i < rbound) {
      ra++;
    }
  }



}				/* end GEMV_NAME(c, c, s, ) */
Exemplo n.º 14
0
void		BLAS_ctrmv_s_x(enum blas_order_type order, enum blas_uplo_type uplo,
		     		enum		blas_trans_type trans, enum blas_diag_type diag, int n,
	   		const		void  *alpha, const float *T, int ldt,
	     		void         *x, int incx, enum blas_prec_type prec)
/*
 * Purpose
 * =======
 *
 * Computes x <-- alpha * T * x, where T is a triangular matrix.
 *
 * Arguments
 * =========
 *
 * order  (input) enum blas_order_type
 *        column major, row major
 *
 * uplo   (input) enum blas_uplo_type
 *        upper, lower
 *
 * trans  (input) enum blas_trans_type
 *        no trans, trans, conj trans
 *
 * diag   (input) enum blas_diag_type
 *        unit, non unit
 *
 * n      (input) int
 *        the dimension of T
 *
 * alpha  (input) const void*
 *
 * T      (input) float*
 *        Triangular matrix
 *
 * ldt    (input) int
 *        Leading dimension of T
 *
 * x      (input) const void*
 *    Array of length n.
 *
 * incx   (input) int
 *     The stride used to access components x[i].
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  static const char routine_name[] = "BLAS_ctrmv_s_x";

  switch (prec) {
  case blas_prec_single:{

      int	      i    , j;	/* used to idx matrix */
      int	      xj   , xj0;
      int	      ti   , tij, tij0;

      int	      inc_ti, inc_tij;
      int	      inc_x;

      const float    *T_i = T;	/* internal matrix T */
      float          *x_i = (float *)x;	/* internal x */
      float          *alpha_i = (float *)alpha;	/* internal alpha */

      float	      t_elem;
      float	      x_elem[2];
      float	      prod[2];
      float	      sum [2];
      float	      tmp [2];



      /* all error calls */
      if ((order != blas_rowmajor && order != blas_colmajor) ||
	  (uplo != blas_upper && uplo != blas_lower) ||
	  (trans != blas_trans &&
	   trans != blas_no_trans &&
	   trans != blas_conj_trans) ||
	  (diag != blas_non_unit_diag && diag != blas_unit_diag) ||
	  (ldt < n) ||
	  (incx == 0)) {
	BLAS_error(routine_name, 0, 0, NULL);
      } else if (n <= 0) {
	BLAS_error(routine_name, -4, n, NULL);
      } else if (incx == 0) {
	BLAS_error(routine_name, -9, incx, NULL);
      }
      if (trans == blas_no_trans) {
	if (uplo == blas_upper) {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = ldt;
	    inc_tij = -1;
	  } else {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  }
	} else {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  } else {
	    inc_ti = -1;
	    inc_tij = ldt;
	  }
	}
      } else {
	if (uplo == blas_upper) {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -1;
	    inc_tij = ldt;
	  } else {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  }
	} else {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  } else {
	    inc_ti = ldt;
	    inc_tij = -1;
	  }
	}
      }



      inc_x *= 2;

      xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x);
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
	xj = xj0;
	for (j = 0; j < n; j++) {
	  x_i[xj] = 0.0;
	  x_i[xj + 1] = 0.0;
	  xj += inc_x;
	}
      } else {

	if (diag == blas_unit_diag) {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    sum[0] = sum[1] = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < (n - 1); j++) {

	      t_elem = T_i[tij];

	      x_elem[0] = x_i[xj];
	      x_elem[1] = x_i[xj + 1];
	      {
		prod[0] = x_elem[0] * t_elem;
		prod[1] = x_elem[1] * t_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    x_elem[0] = x_i[xj];
	    x_elem[1] = x_i[xj + 1];
	    sum[0] = sum[0] + x_elem[0];
	    sum[1] = sum[1] + x_elem[1];

	    if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	      x_i[xj] = sum[0];
	      x_i[xj + 1] = sum[1];
	    } else {
	      {
		tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
		tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
	      }

	      x_i[xj] = tmp[0];
	      x_i[xj + 1] = tmp[1];
	    }

	    ti += inc_ti;
	  }

	} else {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    sum[0] = sum[1] = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < n; j++) {

	      t_elem = T_i[tij];

	      x_elem[0] = x_i[xj];
	      x_elem[1] = x_i[xj + 1];
	      {
		prod[0] = x_elem[0] * t_elem;
		prod[1] = x_elem[1] * t_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	      x_i[xj - inc_x] = sum[0];
	      x_i[xj - inc_x + 1] = sum[1];
	    } else {
	      {
		tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
		tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
	      }

	      x_i[xj - inc_x] = tmp[0];
	      x_i[xj - inc_x + 1] = tmp[1];
	    }

	    ti += inc_ti;
	  }

	}

      }



      break;
    }
  case blas_prec_double:
  case blas_prec_indigenous:{

      int	      i    , j;	/* used to idx matrix */
      int	      xj   , xj0;
      int	      ti   , tij, tij0;

      int	      inc_ti, inc_tij;
      int	      inc_x;

      const float    *T_i = T;	/* internal matrix T */
      float          *x_i = (float *)x;	/* internal x */
      float          *alpha_i = (float *)alpha;	/* internal alpha */

      float	      t_elem;
      float	      x_elem[2];
      double	      prod[2];
      double	      sum[2];
      double	      tmp[2];



      /* all error calls */
      if ((order != blas_rowmajor && order != blas_colmajor) ||
	  (uplo != blas_upper && uplo != blas_lower) ||
	  (trans != blas_trans &&
	   trans != blas_no_trans &&
	   trans != blas_conj_trans) ||
	  (diag != blas_non_unit_diag && diag != blas_unit_diag) ||
	  (ldt < n) ||
	  (incx == 0)) {
	BLAS_error(routine_name, 0, 0, NULL);
      } else if (n <= 0) {
	BLAS_error(routine_name, -4, n, NULL);
      } else if (incx == 0) {
	BLAS_error(routine_name, -9, incx, NULL);
      }
      if (trans == blas_no_trans) {
	if (uplo == blas_upper) {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = ldt;
	    inc_tij = -1;
	  } else {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  }
	} else {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  } else {
	    inc_ti = -1;
	    inc_tij = ldt;
	  }
	}
      } else {
	if (uplo == blas_upper) {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -1;
	    inc_tij = ldt;
	  } else {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  }
	} else {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  } else {
	    inc_ti = ldt;
	    inc_tij = -1;
	  }
	}
      }



      inc_x *= 2;

      xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x);
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
	xj = xj0;
	for (j = 0; j < n; j++) {
	  x_i[xj] = 0.0;
	  x_i[xj + 1] = 0.0;
	  xj += inc_x;
	}
      } else {

	if (diag == blas_unit_diag) {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    sum[0] = sum[1] = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < (n - 1); j++) {

	      t_elem = T_i[tij];

	      x_elem[0] = x_i[xj];
	      x_elem[1] = x_i[xj + 1];
	      {
		prod[0] = (double)x_elem[0] * t_elem;
		prod[1] = (double)x_elem[1] * t_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    x_elem[0] = x_i[xj];
	    x_elem[1] = x_i[xj + 1];
	    sum[0] = sum[0] + x_elem[0];
	    sum[1] = sum[1] + x_elem[1];

	    if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	      x_i[xj] = sum[0];
	      x_i[xj + 1] = sum[1];
	    } else {
	      {
		tmp[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
		tmp[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	      }
	      x_i[xj] = tmp[0];
	      x_i[xj + 1] = tmp[1];
	    }

	    ti += inc_ti;
	  }

	} else {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    sum[0] = sum[1] = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < n; j++) {

	      t_elem = T_i[tij];

	      x_elem[0] = x_i[xj];
	      x_elem[1] = x_i[xj + 1];
	      {
		prod[0] = (double)x_elem[0] * t_elem;
		prod[1] = (double)x_elem[1] * t_elem;
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	      x_i[xj - inc_x] = sum[0];
	      x_i[xj - inc_x + 1] = sum[1];
	    } else {
	      {
		tmp[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
		tmp[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	      }
	      x_i[xj - inc_x] = tmp[0];
	      x_i[xj - inc_x + 1] = tmp[1];
	    }

	    ti += inc_ti;
	  }

	}

      }



      break;
    }

  case blas_prec_extra:{

      int	      i    , j;	/* used to idx matrix */
      int	      xj   , xj0;
      int	      ti   , tij, tij0;

      int	      inc_ti, inc_tij;
      int	      inc_x;

      const float    *T_i = T;	/* internal matrix T */
      float          *x_i = (float *)x;	/* internal x */
      float          *alpha_i = (float *)alpha;	/* internal alpha */

      float	      t_elem;
      float	      x_elem[2];
      double	      head_prod[2], tail_prod[2];
      double	      head_sum[2], tail_sum[2];
      double	      head_tmp[2], tail_tmp[2];

      FPU_FIX_DECL;
      FPU_FIX_START;

      /* all error calls */
      if ((order != blas_rowmajor && order != blas_colmajor) ||
	  (uplo != blas_upper && uplo != blas_lower) ||
	  (trans != blas_trans &&
	   trans != blas_no_trans &&
	   trans != blas_conj_trans) ||
	  (diag != blas_non_unit_diag && diag != blas_unit_diag) ||
	  (ldt < n) ||
	  (incx == 0)) {
	BLAS_error(routine_name, 0, 0, NULL);
      } else if (n <= 0) {
	BLAS_error(routine_name, -4, n, NULL);
      } else if (incx == 0) {
	BLAS_error(routine_name, -9, incx, NULL);
      }
      if (trans == blas_no_trans) {
	if (uplo == blas_upper) {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = ldt;
	    inc_tij = -1;
	  } else {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  }
	} else {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  } else {
	    inc_ti = -1;
	    inc_tij = ldt;
	  }
	}
      } else {
	if (uplo == blas_upper) {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -1;
	    inc_tij = ldt;
	  } else {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  }
	} else {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  } else {
	    inc_ti = ldt;
	    inc_tij = -1;
	  }
	}
      }



      inc_x *= 2;

      xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x);
      if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
	xj = xj0;
	for (j = 0; j < n; j++) {
	  x_i[xj] = 0.0;
	  x_i[xj + 1] = 0.0;
	  xj += inc_x;
	}
      } else {

	if (diag == blas_unit_diag) {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < (n - 1); j++) {

	      t_elem = T_i[tij];

	      x_elem[0] = x_i[xj];
	      x_elem[1] = x_i[xj + 1];
	      {
		head_prod[0] = (double)x_elem[0] * t_elem;
		tail_prod[0] = 0.0;
		head_prod[1] = (double)x_elem[1] * t_elem;
		tail_prod[1] = 0.0;
	      }
	      {
		double		head_t , tail_t;
		double		head_a , tail_a;
		double		head_b , tail_b;
		/* Real part */
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		head_b = head_prod[0];
		tail_b = tail_prod[0];
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		/* Imaginary part */
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		head_b = head_prod[1];
		tail_b = tail_prod[1];
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    x_elem[0] = x_i[xj];
	    x_elem[1] = x_i[xj + 1];
	    {
	      double	      cd [2];
	      cd[0] = (double)x_elem[0];
	      cd[1] = (double)x_elem[1];
	      {
		double		head_t , tail_t;
		double		head_a , tail_a;
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		{
		  /* Compute double-double = double-double + double. */
		  double	  e     , t1, t2;

		  /* Knuth trick. */
		  t1 = head_a + cd[0];
		  e = t1 - head_a;
		  t2 = ((cd[0] - e) + (head_a - (t1 - e))) + tail_a;

		  /* The result is t1 + t2, after normalization. */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		{
		  /* Compute double-double = double-double + double. */
		  double	  e     , t1, t2;

		  /* Knuth trick. */
		  t1 = head_a + cd[1];
		  e = t1 - head_a;
		  t2 = ((cd[1] - e) + (head_a - (t1 - e))) + tail_a;

		  /* The result is t1 + t2, after normalization. */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }
	    }

	    if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	      x_i[xj] = head_sum[0];
	      x_i[xj + 1] = head_sum[1];
	    } else {
	      {
		double		cd      [2];
		cd[0] = (double)alpha_i[0];
		cd[1] = (double)alpha_i[1];
		{
		  /* Compute complex-extra = complex-extra * complex-double. */
		  double	  head_a0, tail_a0;
		  double	  head_a1, tail_a1;
		  double	  head_t1, tail_t1;
		  double	  head_t2, tail_t2;
		  head_a0 = head_sum[0];
		  tail_a0 = tail_sum[0];
		  head_a1 = head_sum[1];
		  tail_a1 = tail_sum[1];
		  /* real part */
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a0 * split;
		    a11 = con - head_a0;
		    a11 = con - a11;
		    a21 = head_a0 - a11;
		    con = cd[0] * split;
		    b1 = con - cd[0];
		    b1 = con - b1;
		    b2 = cd[0] - b1;

		    c11 = head_a0 * cd[0];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a0 * cd[0];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a1 * split;
		    a11 = con - head_a1;
		    a11 = con - a11;
		    a21 = head_a1 - a11;
		    con = cd[1] * split;
		    b1 = con - cd[1];
		    b1 = con - b1;
		    b2 = cd[1] - b1;

		    c11 = head_a1 * cd[1];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a1 * cd[1];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t2 = t1 + t2;
		    tail_t2 = t2 - (head_t2 - t1);
		  }
		  head_t2 = -head_t2;
		  tail_t2 = -tail_t2;
		  {
		    /* Compute double-double = double-double + double-double. */
		    double	    bv;
		    double	    s1  , s2, t1, t2;

		    /* Add two hi words. */
		    s1 = head_t1 + head_t2;
		    bv = s1 - head_t1;
		    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

		    /* Add two lo words. */
		    t1 = tail_t1 + tail_t2;
		    bv = t1 - tail_t1;
		    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

		    s2 += t1;

		    /* Renormalize (s1, s2)  to  (t1, s2) */
		    t1 = s1 + s2;
		    s2 = s2 - (t1 - s1);

		    t2 += s2;

		    /* Renormalize (t1, t2)  */
		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  head_tmp[0] = head_t1;
		  tail_tmp[0] = tail_t1;
		  /* imaginary part */
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a1 * split;
		    a11 = con - head_a1;
		    a11 = con - a11;
		    a21 = head_a1 - a11;
		    con = cd[0] * split;
		    b1 = con - cd[0];
		    b1 = con - b1;
		    b2 = cd[0] - b1;

		    c11 = head_a1 * cd[0];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a1 * cd[0];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a0 * split;
		    a11 = con - head_a0;
		    a11 = con - a11;
		    a21 = head_a0 - a11;
		    con = cd[1] * split;
		    b1 = con - cd[1];
		    b1 = con - b1;
		    b2 = cd[1] - b1;

		    c11 = head_a0 * cd[1];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a0 * cd[1];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t2 = t1 + t2;
		    tail_t2 = t2 - (head_t2 - t1);
		  }
		  {
		    /* Compute double-double = double-double + double-double. */
		    double	    bv;
		    double	    s1  , s2, t1, t2;

		    /* Add two hi words. */
		    s1 = head_t1 + head_t2;
		    bv = s1 - head_t1;
		    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

		    /* Add two lo words. */
		    t1 = tail_t1 + tail_t2;
		    bv = t1 - tail_t1;
		    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

		    s2 += t1;

		    /* Renormalize (s1, s2)  to  (t1, s2) */
		    t1 = s1 + s2;
		    s2 = s2 - (t1 - s1);

		    t2 += s2;

		    /* Renormalize (t1, t2)  */
		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  head_tmp[1] = head_t1;
		  tail_tmp[1] = tail_t1;
		}

	      }
	      x_i[xj] = head_tmp[0];
	      x_i[xj + 1] = head_tmp[1];
	    }

	    ti += inc_ti;
	  }

	} else {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    head_sum[0] = head_sum[1] = tail_sum[0] = tail_sum[1] = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < n; j++) {

	      t_elem = T_i[tij];

	      x_elem[0] = x_i[xj];
	      x_elem[1] = x_i[xj + 1];
	      {
		head_prod[0] = (double)x_elem[0] * t_elem;
		tail_prod[0] = 0.0;
		head_prod[1] = (double)x_elem[1] * t_elem;
		tail_prod[1] = 0.0;
	      }
	      {
		double		head_t , tail_t;
		double		head_a , tail_a;
		double		head_b , tail_b;
		/* Real part */
		head_a = head_sum[0];
		tail_a = tail_sum[0];
		head_b = head_prod[0];
		tail_b = tail_prod[0];
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[0] = head_t;
		tail_sum[0] = tail_t;
		/* Imaginary part */
		head_a = head_sum[1];
		tail_a = tail_sum[1];
		head_b = head_prod[1];
		tail_b = tail_prod[1];
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_a + head_b;
		  bv = s1 - head_a;
		  s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_a + tail_b;
		  bv = t1 - tail_a;
		  t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_t = t1 + t2;
		  tail_t = t2 - (head_t - t1);
		}
		head_sum[1] = head_t;
		tail_sum[1] = tail_t;
	      }

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	      x_i[xj - inc_x] = head_sum[0];
	      x_i[xj - inc_x + 1] = head_sum[1];
	    } else {
	      {
		double		cd      [2];
		cd[0] = (double)alpha_i[0];
		cd[1] = (double)alpha_i[1];
		{
		  /* Compute complex-extra = complex-extra * complex-double. */
		  double	  head_a0, tail_a0;
		  double	  head_a1, tail_a1;
		  double	  head_t1, tail_t1;
		  double	  head_t2, tail_t2;
		  head_a0 = head_sum[0];
		  tail_a0 = tail_sum[0];
		  head_a1 = head_sum[1];
		  tail_a1 = tail_sum[1];
		  /* real part */
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a0 * split;
		    a11 = con - head_a0;
		    a11 = con - a11;
		    a21 = head_a0 - a11;
		    con = cd[0] * split;
		    b1 = con - cd[0];
		    b1 = con - b1;
		    b2 = cd[0] - b1;

		    c11 = head_a0 * cd[0];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a0 * cd[0];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a1 * split;
		    a11 = con - head_a1;
		    a11 = con - a11;
		    a21 = head_a1 - a11;
		    con = cd[1] * split;
		    b1 = con - cd[1];
		    b1 = con - b1;
		    b2 = cd[1] - b1;

		    c11 = head_a1 * cd[1];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a1 * cd[1];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t2 = t1 + t2;
		    tail_t2 = t2 - (head_t2 - t1);
		  }
		  head_t2 = -head_t2;
		  tail_t2 = -tail_t2;
		  {
		    /* Compute double-double = double-double + double-double. */
		    double	    bv;
		    double	    s1  , s2, t1, t2;

		    /* Add two hi words. */
		    s1 = head_t1 + head_t2;
		    bv = s1 - head_t1;
		    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

		    /* Add two lo words. */
		    t1 = tail_t1 + tail_t2;
		    bv = t1 - tail_t1;
		    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

		    s2 += t1;

		    /* Renormalize (s1, s2)  to  (t1, s2) */
		    t1 = s1 + s2;
		    s2 = s2 - (t1 - s1);

		    t2 += s2;

		    /* Renormalize (t1, t2)  */
		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  head_tmp[0] = head_t1;
		  tail_tmp[0] = tail_t1;
		  /* imaginary part */
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a1 * split;
		    a11 = con - head_a1;
		    a11 = con - a11;
		    a21 = head_a1 - a11;
		    con = cd[0] * split;
		    b1 = con - cd[0];
		    b1 = con - b1;
		    b2 = cd[0] - b1;

		    c11 = head_a1 * cd[0];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a1 * cd[0];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  {
		    /* Compute double-double = double-double * double. */
		    double	    a11 , a21, b1, b2, c11, c21, c2, con, t1,
		    		    t2;

		    con = head_a0 * split;
		    a11 = con - head_a0;
		    a11 = con - a11;
		    a21 = head_a0 - a11;
		    con = cd[1] * split;
		    b1 = con - cd[1];
		    b1 = con - b1;
		    b2 = cd[1] - b1;

		    c11 = head_a0 * cd[1];
		    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		    c2 = tail_a0 * cd[1];
		    t1 = c11 + c2;
		    t2 = (c2 - (t1 - c11)) + c21;

		    head_t2 = t1 + t2;
		    tail_t2 = t2 - (head_t2 - t1);
		  }
		  {
		    /* Compute double-double = double-double + double-double. */
		    double	    bv;
		    double	    s1  , s2, t1, t2;

		    /* Add two hi words. */
		    s1 = head_t1 + head_t2;
		    bv = s1 - head_t1;
		    s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

		    /* Add two lo words. */
		    t1 = tail_t1 + tail_t2;
		    bv = t1 - tail_t1;
		    t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

		    s2 += t1;

		    /* Renormalize (s1, s2)  to  (t1, s2) */
		    t1 = s1 + s2;
		    s2 = s2 - (t1 - s1);

		    t2 += s2;

		    /* Renormalize (t1, t2)  */
		    head_t1 = t1 + t2;
		    tail_t1 = t2 - (head_t1 - t1);
		  }
		  head_tmp[1] = head_t1;
		  tail_tmp[1] = tail_t1;
		}

	      }
	      x_i[xj - inc_x] = head_tmp[0];
	      x_i[xj - inc_x + 1] = head_tmp[1];
	    }

	    ti += inc_ti;
	  }

	}

      }

      FPU_FIX_STOP;

      break;
    }
  }
}
Exemplo n.º 15
0
void BLAS_chemv2_c_s(enum blas_order_type order, enum blas_uplo_type uplo,
		     int n, const void *alpha, const void *a, int lda,
		     const float *x_head, const float *x_tail, int incx,
		     const void *beta, const float *y, int incy)

/* 
 * Purpose
 * =======
 *
 * This routines computes the matrix product:
 *
 *     y  <-  alpha * A * (x_head + x_tail) + beta * y
 * 
 * where A is a complex Hermitian matrix.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input symmetric matrix A.
 * 
 * uplo    (input) enum blas_uplo_type
 *         Determines which half of matrix A (upper or lower triangle)
 *         is accessed.
 *
 * n       (input) int
 *         Dimension of A and size of vectors x, y.
 *
 * alpha   (input) const void*
 * 
 * a       (input) void*
 *         Matrix A.
 *
 * lda     (input) int
 *         Leading dimension of matrix A.
 *
 * x_head  (input) float*
 *         Vector x_head
 *
 * x_tail  (input) float*
 *         Vector x_tail
 *   
 * incx    (input) int
 *         Stride for vector x.
 *
 * beta    (input) const void*
 * 
 * y       (input) float*
 *         Vector y.
 *
 * incy    (input) int
 *         Stride for vector y.
 *
 */
{
  /* Routine name */
  const char routine_name[] = "BLAS_chemv2_c_s";

  int i, j;
  int xi, yi, xi0, yi0;
  int aij, ai;
  int incai;
  int incaij, incaij2;

  const float *a_i = (float *) a;
  const float *x_head_i = x_head;
  const float *x_tail_i = x_tail;
  float *y_i = (float *) y;
  float *alpha_i = (float *) alpha;
  float *beta_i = (float *) beta;
  float a_elem[2];
  float x_elem;
  float y_elem[2];
  float diag_elem;
  float prod1[2];
  float prod2[2];
  float sum1[2];
  float sum2[2];
  float tmp1[2];
  float tmp2[2];
  float tmp3[2];



  /* Test for no-op */
  if (n <= 0) {
    return;
  }
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
      && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
    return;
  }

  /* Check for error conditions. */
  if (n < 0) {
    BLAS_error(routine_name, -3, n, NULL);
  }
  if (lda < n) {
    BLAS_error(routine_name, -6, n, NULL);
  }
  if (incx == 0) {
    BLAS_error(routine_name, -9, incx, NULL);
  }
  if (incy == 0) {
    BLAS_error(routine_name, -12, incy, NULL);
  }

  if ((order == blas_colmajor && uplo == blas_upper) ||
      (order == blas_rowmajor && uplo == blas_lower)) {
    incai = lda;
    incaij = 1;
    incaij2 = lda;
  } else {
    incai = 1;
    incaij = lda;
    incaij2 = 1;
  }


  incy *= 2;
  incai *= 2;
  incaij *= 2;
  incaij2 *= 2;
  xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
  yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);



  /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
  if (uplo == blas_lower) {
    for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
      sum1[0] = sum1[1] = 0.0;
      sum2[0] = sum2[1] = 0.0;
      for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
	a_elem[0] = a_i[aij];
	a_elem[1] = a_i[aij + 1];
	x_elem = x_head_i[xi];
	{
	  prod1[0] = a_elem[0] * x_elem;
	  prod1[1] = a_elem[1] * x_elem;
	}
	sum1[0] = sum1[0] + prod1[0];
	sum1[1] = sum1[1] + prod1[1];
	x_elem = x_tail_i[xi];
	{
	  prod2[0] = a_elem[0] * x_elem;
	  prod2[1] = a_elem[1] * x_elem;
	}
	sum2[0] = sum2[0] + prod2[0];
	sum2[1] = sum2[1] + prod2[1];
      }

      diag_elem = a_i[aij];
      x_elem = x_head_i[xi];
      prod1[0] = x_elem * diag_elem;
      prod1[1] = 0.0;
      sum1[0] = sum1[0] + prod1[0];
      sum1[1] = sum1[1] + prod1[1];
      x_elem = x_tail_i[xi];
      prod2[0] = x_elem * diag_elem;
      prod2[1] = 0.0;
      sum2[0] = sum2[0] + prod2[0];
      sum2[1] = sum2[1] + prod2[1];
      j++;
      aij += incaij2;
      xi += incx;

      for (; j < n; j++, aij += incaij2, xi += incx) {
	a_elem[0] = a_i[aij];
	a_elem[1] = a_i[aij + 1];
	a_elem[1] = -a_elem[1];
	x_elem = x_head_i[xi];
	{
	  prod1[0] = a_elem[0] * x_elem;
	  prod1[1] = a_elem[1] * x_elem;
	}
	sum1[0] = sum1[0] + prod1[0];
	sum1[1] = sum1[1] + prod1[1];
	x_elem = x_tail_i[xi];
	{
	  prod2[0] = a_elem[0] * x_elem;
	  prod2[1] = a_elem[1] * x_elem;
	}
	sum2[0] = sum2[0] + prod2[0];
	sum2[1] = sum2[1] + prod2[1];
      }
      sum1[0] = sum1[0] + sum2[0];
      sum1[1] = sum1[1] + sum2[1];
      {
	tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
	tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
      }

      y_elem[0] = y_i[yi];
      y_elem[1] = y_i[yi + 1];
      {
	tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
	tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
      }

      tmp3[0] = tmp1[0] + tmp2[0];
      tmp3[1] = tmp1[1] + tmp2[1];
      y_i[yi] = tmp3[0];
      y_i[yi + 1] = tmp3[1];
    }
  } else {
    /* uplo == blas_upper */
    for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
      sum1[0] = sum1[1] = 0.0;
      sum2[0] = sum2[1] = 0.0;

      for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
	a_elem[0] = a_i[aij];
	a_elem[1] = a_i[aij + 1];
	a_elem[1] = -a_elem[1];
	x_elem = x_head_i[xi];
	{
	  prod1[0] = a_elem[0] * x_elem;
	  prod1[1] = a_elem[1] * x_elem;
	}
	sum1[0] = sum1[0] + prod1[0];
	sum1[1] = sum1[1] + prod1[1];
	x_elem = x_tail_i[xi];
	{
	  prod2[0] = a_elem[0] * x_elem;
	  prod2[1] = a_elem[1] * x_elem;
	}
	sum2[0] = sum2[0] + prod2[0];
	sum2[1] = sum2[1] + prod2[1];
      }

      diag_elem = a_i[aij];
      x_elem = x_head_i[xi];
      prod1[0] = x_elem * diag_elem;
      prod1[1] = 0.0;
      sum1[0] = sum1[0] + prod1[0];
      sum1[1] = sum1[1] + prod1[1];
      x_elem = x_tail_i[xi];
      prod2[0] = x_elem * diag_elem;
      prod2[1] = 0.0;
      sum2[0] = sum2[0] + prod2[0];
      sum2[1] = sum2[1] + prod2[1];
      j++;
      aij += incaij2;
      xi += incx;

      for (; j < n; j++, aij += incaij2, xi += incx) {
	a_elem[0] = a_i[aij];
	a_elem[1] = a_i[aij + 1];
	x_elem = x_head_i[xi];
	{
	  prod1[0] = a_elem[0] * x_elem;
	  prod1[1] = a_elem[1] * x_elem;
	}
	sum1[0] = sum1[0] + prod1[0];
	sum1[1] = sum1[1] + prod1[1];
	x_elem = x_tail_i[xi];
	{
	  prod2[0] = a_elem[0] * x_elem;
	  prod2[1] = a_elem[1] * x_elem;
	}
	sum2[0] = sum2[0] + prod2[0];
	sum2[1] = sum2[1] + prod2[1];
      }
      sum1[0] = sum1[0] + sum2[0];
      sum1[1] = sum1[1] + sum2[1];
      {
	tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
	tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
      }

      y_elem[0] = y_i[yi];
      y_elem[1] = y_i[yi + 1];
      {
	tmp2[0] = y_elem[0] * beta_i[0] - y_elem[1] * beta_i[1];
	tmp2[1] = y_elem[0] * beta_i[1] + y_elem[1] * beta_i[0];
      }

      tmp3[0] = tmp1[0] + tmp2[0];
      tmp3[1] = tmp1[1] + tmp2[1];
      y_i[yi] = tmp3[0];
      y_i[yi + 1] = tmp3[1];
    }
  }



}				/* end BLAS_chemv2_c_s */
Exemplo n.º 16
0
void BLAS_ztrmv_c_testgen(int norm, enum blas_order_type order,
			  enum blas_uplo_type uplo,
			  enum blas_trans_type trans,
			  enum blas_diag_type diag, int n, void *alpha,
			  int alpha_flag, void *T, int ldt, void *x,
			  int *seed, double *head_r_true, double *tail_r_true)

/*
 * Purpose
 * =======
 *
 * Generates alpha, T and x, where T is a triangular matrix; and 
 * computes r_true.
 *
 * Arguments
 * =========
 *
 * norm         (input) blas_norm_type 
 *
 * order        (input) blas_order_type
 *              Order of T; row or column major
 *
 * uplo         (input) blas_uplo_type
 *              Whether T is upper or lower
 *
 * trans        (input) blas_trans_type
 *              No trans, trans, conj trans
 *
 * diag         (input) blas_diag_type
 *              non unit, unit
 *
 * n            (input) int
 *              Dimension of AP and the length of vector x
 *
 * alpha        (input/output) void*
 *              If alpha_flag = 1, alpha is input.
 *              If alpha_flag = 0, alpha is output.
 *
 * alpha_flag   (input) int
 *              = 0 : alpha is free, and is output.
 *              = 1 : alpha is fixed on input.
 *              
 * T            (output) void*
 *
 * x            (input/output) void*
 *
 * seed         (input/output) int
 *
 * head_r_true     (output) double*
 *              The leading part of the truth in double-double.
 *
 * tail_r_true     (output) double*
 *              The trailing part of the truth in double-double.
 *
 */
{
  double *x_i = (double *) x;
  float *T_i = (float *) T;
  double *alpha_i = (double *) alpha;
  double *x_vec;
  float *t_vec;
  double beta[2];
  double r[2];
  double head_r_true_elem[2], tail_r_true_elem[2];
  double x_elem[2];
  float t_elem[2];

  int inc_tvec = 1, inc_xvec = 1;
  int xvec_i, tvec_j;
  int xi;
  int ti, tij;
  int inc_ti, inc_tij;
  int inc_xi;
  int i, j;

  r[0] = r[1] = 0.0;
  beta[0] = beta[1] = 0.0;

  inc_tvec *= 2;
  inc_xvec *= 2;

  t_vec = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && t_vec == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  };
  x_vec = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_vec == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  };

  if (trans == blas_no_trans) {
    if (uplo == blas_upper) {
      inc_xi = -1;
      if (order == blas_rowmajor) {
	inc_ti = -ldt;
	inc_tij = -1;
      } else {
	inc_ti = -1;
	inc_tij = -ldt;
      }
    } else {
      inc_xi = 1;
      if (order == blas_rowmajor) {
	inc_ti = ldt;
	inc_tij = 1;
      } else {
	inc_ti = 1;
	inc_tij = ldt;
      }
    }
  } else {
    if (uplo == blas_upper) {
      inc_xi = 1;
      if (order == blas_rowmajor) {
	inc_ti = 1;
	inc_tij = ldt;
      } else {
	inc_ti = ldt;
	inc_tij = 1;
      }
    } else {
      inc_xi = -1;
      if (order == blas_rowmajor) {
	inc_ti = -1;
	inc_tij = -ldt;
      } else {
	inc_ti = -ldt;
	inc_tij = -1;
      }
    }
  }

  inc_xi *= 2;

  inc_ti *= 2;
  inc_tij *= 2;

  /* Call dot_testgen n times.  Each call will generate
   * one row of T and one element of x.                   */
  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
  xi = (inc_xi > 0 ? 0 : -(n - 1) * inc_xi);
  xvec_i = 0;
  for (i = 0; i < n; i++) {

    /* Generate the i-th element of x_vec and all of t_vec. */
    if (diag == blas_unit_diag) {
      /* Since we need alpha = beta, we fix alpha if alpha_flag = 0. */
      if (i == 0 && alpha_flag == 0) {
	alpha_i[0] = xrand(seed);
	alpha_i[1] = xrand(seed);
      }
      BLAS_zdot_z_c_testgen(i, 0, i, norm, blas_no_conj, alpha_i,
			    1, alpha_i, 1, x_vec, t_vec,
			    seed, r, head_r_true_elem, tail_r_true_elem);

      /* Copy generated t_vec to T. */
      tvec_j = 0;
      tij = (inc_tij > 0 ? ti : ti - (n - 1) * inc_tij);
      for (j = 0; j < i; j++) {
	t_elem[0] = t_vec[tvec_j];
	t_elem[1] = t_vec[tvec_j + 1];

	if (trans == blas_conj_trans) {
	  t_elem[1] = -t_elem[1];
	}
	T_i[tij] = t_elem[0];
	T_i[tij + 1] = t_elem[1];
	tvec_j += inc_tvec;
	tij += inc_tij;
      }

      /* Set the diagonal element to 1. */
      t_elem[0] = 1.0;
      t_elem[1] = 0.0;
      T_i[tij] = t_elem[0];
      T_i[tij + 1] = t_elem[1];

      /* Set x[i] to be r. */
      x_i[xi] = r[0];
      x_i[xi + 1] = r[1];
      x_vec[xvec_i] = r[0];
      x_vec[xvec_i + 1] = r[1];

    } else {
      BLAS_zdot_z_c_testgen(i + 1, 0, i, norm, blas_no_conj, alpha,
			    (i == 0 ? alpha_flag : 1), beta, 1, x_vec, t_vec,
			    seed, r, head_r_true_elem, tail_r_true_elem);

      /* Copy generated t_vec to T. */
      tvec_j = 0;
      tij = (inc_tij > 0 ? ti : ti - (n - 1) * inc_tij);
      for (j = 0; j <= i; j++) {
	t_elem[0] = t_vec[tvec_j];
	t_elem[1] = t_vec[tvec_j + 1];

	if (trans == blas_conj_trans) {
	  t_elem[1] = -t_elem[1];
	}
	T_i[tij] = t_elem[0];
	T_i[tij + 1] = t_elem[1];
	tvec_j += inc_tvec;
	tij += inc_tij;
      }

      /* Copy generated x_vec[i] to appropriate position in x. */
      x_elem[0] = x_vec[xvec_i];
      x_elem[1] = x_vec[xvec_i + 1];
      x_i[xi] = x_elem[0];
      x_i[xi + 1] = x_elem[1];
    }

    /* Copy r_true */
    head_r_true[xi] = head_r_true_elem[0];
    head_r_true[xi + 1] = head_r_true_elem[1];
    tail_r_true[xi] = tail_r_true_elem[0];
    tail_r_true[xi + 1] = tail_r_true_elem[1];

    xvec_i += inc_xvec;
    xi += inc_xi;

    ti += inc_ti;
  }

  blas_free(x_vec);
  blas_free(t_vec);
}
Exemplo n.º 17
0
void BLAS_dgbmv_s_d_x(enum blas_order_type order, enum blas_trans_type trans,
		      int m, int n, int kl, int ku, double alpha,
		      const float *a, int lda, const double *x, int incx,
		      double beta, double *y, int incy,
		      enum blas_prec_type prec)

/*           
 * Purpose
 * =======
 *
 *  gbmv computes y = alpha * A * x + beta * y, where 
 *
 *  A is a m x n banded matrix
 *  x is a n x 1 vector
 *  y is a m x 1 vector
 *  alpha and beta are scalars 
 *
 *   
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of AP; row or column major
 *
 * trans        (input) blas_trans_type
 *              Transpose of AB; no trans, 
 *              trans, or conjugate trans
 *
 * m            (input) int
 *              Dimension of AB
 *
 * n            (input) int
 *              Dimension of AB and the length of vector x
 *
 * kl           (input) int 
 *              Number of lower diagnols of AB
 *
 * ku           (input) int
 *              Number of upper diagnols of AB
 *
 * alpha        (input) double
 *              
 * AB           (input) float*
 *
 * lda          (input) int 
 *              Leading dimension of AB
 *              lda >= ku + kl + 1
 *
 * x            (input) double*
 * 
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) double
 *
 * y            (input/output) double*
 *
 * incy         (input) int
 *              The stride for vector y.
 * 
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 *
 * LOCAL VARIABLES 
 * ===============
 * 
 *  As an example, these variables are described on the mxn, column 
 *  major, banded matrix described in section 2.2.3 of the specification  
 *
 *  astart      indexes first element in A where computation begins
 *
 *  incai1      indexes first element in row where row is less than lbound
 * 
 *  incai2      indexes first element in row where row exceeds lbound
 *   
 *  lbound      denotes the number of rows before  first element shifts 
 *
 *  rbound      denotes the columns where there is blank space
 *   
 *  ra          index of the rightmost element for a given row
 *  
 *  la          index of leftmost  elements for a given row
 *
 *  ra - la     width of a row
 *
 *                        rbound 
 *            la   ra    ____|_____ 
 *             |    |   |          |
 *         |  a00  a01   *    *   *
 * lbound -|  a10  a11  a12   *   *
 *         |  a20  a21  a22  a23  *
 *             *   a31  a32  a33 a34
 *             *    *   a42  a43 a44
 *
 *  Varations on order and transpose have been implemented by modifying these
 *  local variables. 
 *
 */
{
  static const char routine_name[] = "BLAS_dgbmv_s_d_x";

  switch (prec) {
  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:
    {
      int ky, iy, kx, jx, j, i, rbound, lbound, ra, la, lenx, leny;
      int incaij, aij, incai1, incai2, astart, ai;
      double *y_i = y;
      const float *a_i = a;
      const double *x_i = x;
      double alpha_i = alpha;
      double beta_i = beta;
      double tmp1;
      double tmp2;
      double result;
      double sum;
      double prod;
      float a_elem;
      double x_elem;
      double y_elem;


      if (order != blas_colmajor && order != blas_rowmajor)
	BLAS_error(routine_name, -1, order, NULL);
      if (trans != blas_no_trans &&
	  trans != blas_trans && trans != blas_conj_trans) {
	BLAS_error(routine_name, -2, trans, NULL);
      }
      if (m < 0)
	BLAS_error(routine_name, -3, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -4, n, NULL);
      if (kl < 0 || kl >= m)
	BLAS_error(routine_name, -5, kl, NULL);
      if (ku < 0 || ku >= n)
	BLAS_error(routine_name, -6, ku, NULL);
      if (lda < kl + ku + 1)
	BLAS_error(routine_name, -9, lda, NULL);
      if (incx == 0)
	BLAS_error(routine_name, -11, incx, NULL);
      if (incy == 0)
	BLAS_error(routine_name, -14, incy, NULL);

      if ((m == 0) || (n == 0) || (((alpha_i == 0.0) && (beta_i == 1.0))))
	return;

      if (trans == blas_no_trans) {
	lenx = n;
	leny = m;
      } else {			/* change back */
	lenx = m;
	leny = n;
      }

      if (incx < 0) {
	kx = -(lenx - 1) * incx;
      } else {
	kx = 0;
      }

      if (incy < 0) {
	ky = -(leny - 1) * incy;
      } else {
	ky = 0;
      }



      /* if alpha = 0, return y = y*beta */
      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	astart = ku;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
	astart = ku;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	astart = kl;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else {			/* rowmajor and blas_trans */
	astart = kl;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      }









      la = 0;
      ai = astart;
      iy = ky;
      for (i = 0; i < leny; i++) {
	sum = 0.0;
	aij = ai;
	jx = kx;

	for (j = ra - la; j >= 0; j--) {
	  x_elem = x_i[jx];
	  a_elem = a_i[aij];
	  prod = x_elem * a_elem;
	  sum = sum + prod;
	  aij += incaij;
	  jx += incx;
	}


	tmp1 = sum * alpha_i;
	y_elem = y_i[iy];
	tmp2 = beta_i * y_elem;
	result = tmp1 + tmp2;
	y_i[iy] = result;
	iy += incy;
	if (i >= lbound) {
	  kx += incx;
	  ai += incai2;
	  la++;
	} else {
	  ai += incai1;
	}
	if (i < rbound) {
	  ra++;
	}
      }


    }
    break;
  case blas_prec_extra:
    {
      int ky, iy, kx, jx, j, i, rbound, lbound, ra, la, lenx, leny;
      int incaij, aij, incai1, incai2, astart, ai;
      double *y_i = y;
      const float *a_i = a;
      const double *x_i = x;
      double alpha_i = alpha;
      double beta_i = beta;
      double head_tmp1, tail_tmp1;
      double head_tmp2, tail_tmp2;
      double result;
      double head_sum, tail_sum;
      double head_prod, tail_prod;
      float a_elem;
      double x_elem;
      double y_elem;
      FPU_FIX_DECL;

      if (order != blas_colmajor && order != blas_rowmajor)
	BLAS_error(routine_name, -1, order, NULL);
      if (trans != blas_no_trans &&
	  trans != blas_trans && trans != blas_conj_trans) {
	BLAS_error(routine_name, -2, trans, NULL);
      }
      if (m < 0)
	BLAS_error(routine_name, -3, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -4, n, NULL);
      if (kl < 0 || kl >= m)
	BLAS_error(routine_name, -5, kl, NULL);
      if (ku < 0 || ku >= n)
	BLAS_error(routine_name, -6, ku, NULL);
      if (lda < kl + ku + 1)
	BLAS_error(routine_name, -9, lda, NULL);
      if (incx == 0)
	BLAS_error(routine_name, -11, incx, NULL);
      if (incy == 0)
	BLAS_error(routine_name, -14, incy, NULL);

      if ((m == 0) || (n == 0) || (((alpha_i == 0.0) && (beta_i == 1.0))))
	return;

      if (trans == blas_no_trans) {
	lenx = n;
	leny = m;
      } else {			/* change back */
	lenx = m;
	leny = n;
      }

      if (incx < 0) {
	kx = -(lenx - 1) * incx;
      } else {
	kx = 0;
      }

      if (incy < 0) {
	ky = -(leny - 1) * incy;
      } else {
	ky = 0;
      }

      FPU_FIX_START;

      /* if alpha = 0, return y = y*beta */
      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	astart = ku;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
	astart = ku;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	astart = kl;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else {			/* rowmajor and blas_trans */
	astart = kl;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      }









      la = 0;
      ai = astart;
      iy = ky;
      for (i = 0; i < leny; i++) {
	head_sum = tail_sum = 0.0;
	aij = ai;
	jx = kx;

	for (j = ra - la; j >= 0; j--) {
	  x_elem = x_i[jx];
	  a_elem = a_i[aij];
	  {
	    double dt = (double) a_elem;
	    {
	      /* Compute double_double = double * double. */
	      double a1, a2, b1, b2, con;

	      con = x_elem * split;
	      a1 = con - x_elem;
	      a1 = con - a1;
	      a2 = x_elem - a1;
	      con = dt * split;
	      b1 = con - dt;
	      b1 = con - b1;
	      b2 = dt - b1;

	      head_prod = x_elem * dt;
	      tail_prod =
		(((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	  }
	  {
	    /* Compute double-double = double-double + double-double. */
	    double bv;
	    double s1, s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_sum + head_prod;
	    bv = s1 - head_sum;
	    s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_sum + tail_prod;
	    bv = t1 - tail_sum;
	    t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_sum = t1 + t2;
	    tail_sum = t2 - (head_sum - t1);
	  }
	  aij += incaij;
	  jx += incx;
	}


	{
	  /* Compute double-double = double-double * double. */
	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	  con = head_sum * split;
	  a11 = con - head_sum;
	  a11 = con - a11;
	  a21 = head_sum - a11;
	  con = alpha_i * split;
	  b1 = con - alpha_i;
	  b1 = con - b1;
	  b2 = alpha_i - b1;

	  c11 = head_sum * alpha_i;
	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	  c2 = tail_sum * alpha_i;
	  t1 = c11 + c2;
	  t2 = (c2 - (t1 - c11)) + c21;

	  head_tmp1 = t1 + t2;
	  tail_tmp1 = t2 - (head_tmp1 - t1);
	}
	y_elem = y_i[iy];
	{
	  /* Compute double_double = double * double. */
	  double a1, a2, b1, b2, con;

	  con = beta_i * split;
	  a1 = con - beta_i;
	  a1 = con - a1;
	  a2 = beta_i - a1;
	  con = y_elem * split;
	  b1 = con - y_elem;
	  b1 = con - b1;
	  b2 = y_elem - b1;

	  head_tmp2 = beta_i * y_elem;
	  tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
	}
	{
	  /* Compute double-double = double-double + double-double. */
	  double bv;
	  double s1, s2, t1, t2;

	  /* Add two hi words. */
	  s1 = head_tmp1 + head_tmp2;
	  bv = s1 - head_tmp1;
	  s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

	  /* Add two lo words. */
	  t1 = tail_tmp1 + tail_tmp2;
	  bv = t1 - tail_tmp1;
	  t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

	  s2 += t1;

	  /* Renormalize (s1, s2)  to  (t1, s2) */
	  t1 = s1 + s2;
	  s2 = s2 - (t1 - s1);

	  t2 += s2;

	  /* Renormalize (t1, t2)  */
	  result = t1 + t2;
	}
	y_i[iy] = result;
	iy += incy;
	if (i >= lbound) {
	  kx += incx;
	  ai += incai2;
	  la++;
	} else {
	  ai += incai1;
	}
	if (i < rbound) {
	  ra++;
	}
      }

      FPU_FIX_STOP;
    }
    break;
  }
}				/* end GEMV_NAME(d, s, d, _x) */
Exemplo n.º 18
0
void		BLAS_cgbmv2_x(enum blas_order_type order, enum blas_trans_type trans,
		    		int		m       , int n, int kl, int ku, const void *alpha,
	  		const		void  *a, int lda, const void *head_x,
      		const		void  *tail_x, int incx, const void *beta,
	    		void         *y, int incy, enum blas_prec_type prec)
/*
 * Purpose
 * =======
 *
 * This routines computes the matrix product:
 *
 *     y  <-  alpha * op(A) * (x_head + x_tail) + beta * y
 *
 * where
 *
 *  A is a m x n banded matrix
 *  x is a n x 1 vector
 *  y is a m x 1 vector
 *  alpha and beta are scalars
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of AB; row or column major
 *
 * trans        (input) blas_trans_type
 *              Transpose of AB; no trans,
 *              trans, or conjugate trans
 *
 * m            (input) int
 *              Dimension of AB
 *
 * n            (input) int
 *              Dimension of AB and the length of vector x and z
 *
 * kl           (input) int
 *              Number of lower diagnols of AB
 *
 * ku           (input) int
 *              Number of upper diagnols of AB
 *
 * alpha        (input) const void*
 *
 * AB           (input) void*
 *
 * lda          (input) int
 *              Leading dimension of AB
 *              lda >= ku + kl + 1
 *
 * head_x
 * tail_x       (input) void*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) const void*
 *
 * y            (input) const void*
 *
 * incy         (input) int
 *              The stride for vector y.
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 *
 * LOCAL VARIABLES
 * ===============
 *
 *  As an example, these variables are described on the mxn, column
 *  major, banded matrix described in section 2.2.3 of the specification
 *
 *  astart      indexes first element in A where computation begins
 *
 *  incai1      indexes first element in row where row is less than lbound
 *
 *  incai2      indexes first element in row where row exceeds lbound
 *
 *  lbound      denotes the number of rows before  first element shifts
 *
 *  rbound      denotes the columns where there is blank space
 *
 *  ra          index of the rightmost element for a given row
 *
 *  la          index of leftmost  elements for a given row
 *
 *  ra - la     width of a row
 *
 *                        rbound
 *            la   ra    ____|_____
 *             |    |   |          |
 *         |  a00  a01   *    *   *
 * lbound -|  a10  a11  a12   *   *
 *         |  a20  a21  a22  a23  *
 *             *   a31  a32  a33 a34
 *             *    *   a42  a43 a44
 *
 *  Varations on order and transpose have been implemented by modifying these
 *  local variables.
 *
 */
{
  static const char routine_name[] = "BLAS_cgbmv2_x";

  switch (prec) {
  case blas_prec_single:{

      int	      iy0  , iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx,
      		      leny;
      int	      incaij, aij, incai1, incai2, astart, ai;
      float          *y_i = (float *)y;
      const float    *a_i = (float *)a;
      const float    *head_x_i = (float *)head_x;
      const float    *tail_x_i = (float *)tail_x;
      float          *alpha_i = (float *)alpha;
      float          *beta_i = (float *)beta;
      float	      tmp1[2];
      float	      tmp2[2];
      float	      tmp3[2];
      float	      tmp4[2];
      float	      result[2];
      float	      sum1[2];
      float	      sum2[2];
      float	      prod[2];
      float	      a_elem[2];
      float	      x_elem[2];
      float	      y_elem[2];


      if (order != blas_colmajor && order != blas_rowmajor)
	BLAS_error(routine_name, -1, order, NULL);
      if (trans != blas_no_trans &&
	  trans != blas_trans &&
	  trans != blas_conj_trans) {
	BLAS_error(routine_name, -2, trans, NULL);
      }
      if (m < 0)
	BLAS_error(routine_name, -3, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -4, n, NULL);
      if (kl < 0 || kl >= m)
	BLAS_error(routine_name, -5, kl, NULL);
      if (ku < 0 || ku >= n)
	BLAS_error(routine_name, -6, ku, NULL);
      if (lda < kl + ku + 1)
	BLAS_error(routine_name, -9, lda, NULL);
      if (incx == 0)
	BLAS_error(routine_name, -12, incx, NULL);
      if (incy == 0)
	BLAS_error(routine_name, -15, incy, NULL);

      if (m == 0 || n == 0)
	return;
      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
	return;

      if (trans == blas_no_trans) {
	lenx = n;
	leny = m;
      } else {
	lenx = m;
	leny = n;
      }

      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;



      /* if alpha = 0, return y = y*beta */
      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	astart = ku;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
	astart = ku;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	astart = kl;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else {			/* rowmajor and blas_trans */
	astart = kl;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      }
      incx *= 2;
      incy *= 2;
      incaij *= 2;
      incai1 *= 2;
      incai2 *= 2;
      astart *= 2;
      iy0 *= 2;
      ix0 *= 2;

      la = 0;
      ai = astart;
      iy = iy0;
      for (i = 0; i < leny; i++) {
	sum1[0] = sum1[1] = 0.0;
	sum2[0] = sum2[1] = 0.0;
	aij = ai;
	jx = ix0;
	if (trans != blas_conj_trans) {
	  for (j = ra - la; j >= 0; j--) {
	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
	    }

	    sum1[0] = sum1[0] + prod[0];
	    sum1[1] = sum1[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
	    }

	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }

	} else {
	  for (j = ra - la; j >= 0; j--) {
	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    a_elem[1] = -a_elem[1];
	    {
	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
	    }

	    sum1[0] = sum1[0] + prod[0];
	    sum1[1] = sum1[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0] - x_elem[1] * a_elem[1];
	      prod[1] = x_elem[0] * a_elem[1] + x_elem[1] * a_elem[0];
	    }

	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	}

	{
	  tmp1[0] = sum1[0] * alpha_i[0] - sum1[1] * alpha_i[1];
	  tmp1[1] = sum1[0] * alpha_i[1] + sum1[1] * alpha_i[0];
	}

	{
	  tmp2[0] = sum2[0] * alpha_i[0] - sum2[1] * alpha_i[1];
	  tmp2[1] = sum2[0] * alpha_i[1] + sum2[1] * alpha_i[0];
	}

	tmp3[0] = tmp1[0] + tmp2[0];
	tmp3[1] = tmp1[1] + tmp2[1];
	y_elem[0] = y_i[iy];
	y_elem[1] = y_i[iy + 1];
	{
	  tmp4[0] = beta_i[0] * y_elem[0] - beta_i[1] * y_elem[1];
	  tmp4[1] = beta_i[0] * y_elem[1] + beta_i[1] * y_elem[0];
	}

	result[0] = tmp4[0] + tmp3[0];
	result[1] = tmp4[1] + tmp3[1];
	y_i[iy] = result[0];
	y_i[iy + 1] = result[1];

	iy += incy;
	if (i >= lbound) {
	  ix0 += incx;
	  ai += incai2;
	  la++;
	} else {
	  ai += incai1;
	}
	if (i < rbound) {
	  ra++;
	}
      }



      break;
    }
  case blas_prec_double:
  case blas_prec_indigenous:
    {
      int	      iy0  , iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx,
      		      leny;
      int	      incaij, aij, incai1, incai2, astart, ai;
      float          *y_i = (float *)y;
      const float    *a_i = (float *)a;
      const float    *head_x_i = (float *)head_x;
      const float    *tail_x_i = (float *)tail_x;
      float          *alpha_i = (float *)alpha;
      float          *beta_i = (float *)beta;
      double	      tmp1[2];
      double	      tmp2[2];
      double	      tmp3[2];
      double	      tmp4[2];
      float	      result[2];
      double	      sum1[2];
      double	      sum2[2];
      double	      prod[2];
      float	      a_elem[2];
      float	      x_elem[2];
      float	      y_elem[2];


      if (order != blas_colmajor && order != blas_rowmajor)
	BLAS_error(routine_name, -1, order, NULL);
      if (trans != blas_no_trans &&
	  trans != blas_trans &&
	  trans != blas_conj_trans) {
	BLAS_error(routine_name, -2, trans, NULL);
      }
      if (m < 0)
	BLAS_error(routine_name, -3, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -4, n, NULL);
      if (kl < 0 || kl >= m)
	BLAS_error(routine_name, -5, kl, NULL);
      if (ku < 0 || ku >= n)
	BLAS_error(routine_name, -6, ku, NULL);
      if (lda < kl + ku + 1)
	BLAS_error(routine_name, -9, lda, NULL);
      if (incx == 0)
	BLAS_error(routine_name, -12, incx, NULL);
      if (incy == 0)
	BLAS_error(routine_name, -15, incy, NULL);

      if (m == 0 || n == 0)
	return;
      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
	return;

      if (trans == blas_no_trans) {
	lenx = n;
	leny = m;
      } else {
	lenx = m;
	leny = n;
      }

      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;



      /* if alpha = 0, return y = y*beta */
      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	astart = ku;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
	astart = ku;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	astart = kl;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else {			/* rowmajor and blas_trans */
	astart = kl;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      }
      incx *= 2;
      incy *= 2;
      incaij *= 2;
      incai1 *= 2;
      incai2 *= 2;
      astart *= 2;
      iy0 *= 2;
      ix0 *= 2;

      la = 0;
      ai = astart;
      iy = iy0;
      for (i = 0; i < leny; i++) {
	sum1[0] = sum1[1] = 0.0;
	sum2[0] = sum2[1] = 0.0;
	aij = ai;
	jx = ix0;
	if (trans != blas_conj_trans) {
	  for (j = ra - la; j >= 0; j--) {
	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    {
	      prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1];
	      prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0];
	    }
	    sum1[0] = sum1[0] + prod[0];
	    sum1[1] = sum1[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1];
	      prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0];
	    }
	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }

	} else {
	  for (j = ra - la; j >= 0; j--) {
	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    a_elem[1] = -a_elem[1];
	    {
	      prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1];
	      prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0];
	    }
	    sum1[0] = sum1[0] + prod[0];
	    sum1[1] = sum1[1] + prod[1];
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      prod[0] = (double)x_elem[0] * a_elem[0] - (double)x_elem[1] * a_elem[1];
	      prod[1] = (double)x_elem[0] * a_elem[1] + (double)x_elem[1] * a_elem[0];
	    }
	    sum2[0] = sum2[0] + prod[0];
	    sum2[1] = sum2[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	}

	{
	  tmp1[0] = (double)sum1[0] * alpha_i[0] - (double)sum1[1] * alpha_i[1];
	  tmp1[1] = (double)sum1[0] * alpha_i[1] + (double)sum1[1] * alpha_i[0];
	}
	{
	  tmp2[0] = (double)sum2[0] * alpha_i[0] - (double)sum2[1] * alpha_i[1];
	  tmp2[1] = (double)sum2[0] * alpha_i[1] + (double)sum2[1] * alpha_i[0];
	}
	tmp3[0] = tmp1[0] + tmp2[0];
	tmp3[1] = tmp1[1] + tmp2[1];
	y_elem[0] = y_i[iy];
	y_elem[1] = y_i[iy + 1];
	{
	  tmp4[0] = (double)beta_i[0] * y_elem[0] - (double)beta_i[1] * y_elem[1];
	  tmp4[1] = (double)beta_i[0] * y_elem[1] + (double)beta_i[1] * y_elem[0];
	}
	result[0] = tmp4[0] + tmp3[0];
	result[1] = tmp4[1] + tmp3[1];
	y_i[iy] = result[0];
	y_i[iy + 1] = result[1];

	iy += incy;
	if (i >= lbound) {
	  ix0 += incx;
	  ai += incai2;
	  la++;
	} else {
	  ai += incai1;
	}
	if (i < rbound) {
	  ra++;
	}
      }


    }
    break;
  case blas_prec_extra:
    {
      int	      iy0  , iy, ix0, jx, j, i, rbound, lbound, ra, la, lenx,
      		      leny;
      int	      incaij, aij, incai1, incai2, astart, ai;
      float          *y_i = (float *)y;
      const float    *a_i = (float *)a;
      const float    *head_x_i = (float *)head_x;
      const float    *tail_x_i = (float *)tail_x;
      float          *alpha_i = (float *)alpha;
      float          *beta_i = (float *)beta;
      double	      head_tmp1[2], tail_tmp1[2];
      double	      head_tmp2[2], tail_tmp2[2];
      double	      head_tmp3[2], tail_tmp3[2];
      double	      head_tmp4[2], tail_tmp4[2];
      float	      result[2];
      double	      head_sum1[2], tail_sum1[2];
      double	      head_sum2[2], tail_sum2[2];
      double	      head_prod[2], tail_prod[2];
      float	      a_elem[2];
      float	      x_elem[2];
      float	      y_elem[2];
      FPU_FIX_DECL;

      if (order != blas_colmajor && order != blas_rowmajor)
	BLAS_error(routine_name, -1, order, NULL);
      if (trans != blas_no_trans &&
	  trans != blas_trans &&
	  trans != blas_conj_trans) {
	BLAS_error(routine_name, -2, trans, NULL);
      }
      if (m < 0)
	BLAS_error(routine_name, -3, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -4, n, NULL);
      if (kl < 0 || kl >= m)
	BLAS_error(routine_name, -5, kl, NULL);
      if (ku < 0 || ku >= n)
	BLAS_error(routine_name, -6, ku, NULL);
      if (lda < kl + ku + 1)
	BLAS_error(routine_name, -9, lda, NULL);
      if (incx == 0)
	BLAS_error(routine_name, -12, incx, NULL);
      if (incy == 0)
	BLAS_error(routine_name, -15, incy, NULL);

      if (m == 0 || n == 0)
	return;
      if ((alpha_i[0] == 0.0 && alpha_i[1] == 0.0) && ((beta_i[0] == 1.0 && beta_i[1] == 0.0)))
	return;

      if (trans == blas_no_trans) {
	lenx = n;
	leny = m;
      } else {
	lenx = m;
	leny = n;
      }

      ix0 = (incx > 0) ? 0 : -(lenx - 1) * incx;
      iy0 = (incy > 0) ? 0 : -(leny - 1) * incy;

      FPU_FIX_START;

      /* if alpha = 0, return y = y*beta */
      if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	astart = ku;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else if ((order == blas_colmajor) && (trans != blas_no_trans)) {
	astart = ku;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      } else if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	astart = kl;
	incai1 = lda - 1;
	incai2 = lda;
	incaij = 1;
	lbound = kl;
	rbound = n - ku - 1;
	ra = ku;
      } else {			/* rowmajor and blas_trans */
	astart = kl;
	incai1 = 1;
	incai2 = lda;
	incaij = lda - 1;
	lbound = ku;
	rbound = m - kl - 1;
	ra = kl;
      }
      incx *= 2;
      incy *= 2;
      incaij *= 2;
      incai1 *= 2;
      incai2 *= 2;
      astart *= 2;
      iy0 *= 2;
      ix0 *= 2;

      la = 0;
      ai = astart;
      iy = iy0;
      for (i = 0; i < leny; i++) {
	head_sum1[0] = head_sum1[1] = tail_sum1[0] = tail_sum1[1] = 0.0;
	head_sum2[0] = head_sum2[1] = tail_sum2[0] = tail_sum2[1] = 0.0;
	aij = ai;
	jx = ix0;
	if (trans != blas_conj_trans) {
	  for (j = ra - la; j >= 0; j--) {
	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    {
	      double	      head_e1, tail_e1;
	      double	      d1;
	      double	      d2;
	      /* Real part */
	      d1 = (double)x_elem[0] * a_elem[0];
	      d2 = (double)-x_elem[1] * a_elem[1];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[0] = head_e1;
	      tail_prod[0] = tail_e1;
	      /* imaginary part */
	      d1 = (double)x_elem[0] * a_elem[1];
	      d2 = (double)x_elem[1] * a_elem[0];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[1] = head_e1;
	      tail_prod[1] = tail_e1;
	    }
	    {
	      double	      head_t, tail_t;
	      double	      head_a, tail_a;
	      double	      head_b, tail_b;
	      /* Real part */
	      head_a = head_sum1[0];
	      tail_a = tail_sum1[0];
	      head_b = head_prod[0];
	      tail_b = tail_prod[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum1[0] = head_t;
	      tail_sum1[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_sum1[1];
	      tail_a = tail_sum1[1];
	      head_b = head_prod[1];
	      tail_b = tail_prod[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum1[1] = head_t;
	      tail_sum1[1] = tail_t;
	    }
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      double	      head_e1, tail_e1;
	      double	      d1;
	      double	      d2;
	      /* Real part */
	      d1 = (double)x_elem[0] * a_elem[0];
	      d2 = (double)-x_elem[1] * a_elem[1];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[0] = head_e1;
	      tail_prod[0] = tail_e1;
	      /* imaginary part */
	      d1 = (double)x_elem[0] * a_elem[1];
	      d2 = (double)x_elem[1] * a_elem[0];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[1] = head_e1;
	      tail_prod[1] = tail_e1;
	    }
	    {
	      double	      head_t, tail_t;
	      double	      head_a, tail_a;
	      double	      head_b, tail_b;
	      /* Real part */
	      head_a = head_sum2[0];
	      tail_a = tail_sum2[0];
	      head_b = head_prod[0];
	      tail_b = tail_prod[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum2[0] = head_t;
	      tail_sum2[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_sum2[1];
	      tail_a = tail_sum2[1];
	      head_b = head_prod[1];
	      tail_b = tail_prod[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum2[1] = head_t;
	      tail_sum2[1] = tail_t;
	    }
	    aij += incaij;
	    jx += incx;
	  }

	} else {
	  for (j = ra - la; j >= 0; j--) {
	    x_elem[0] = head_x_i[jx];
	    x_elem[1] = head_x_i[jx + 1];
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    a_elem[1] = -a_elem[1];
	    {
	      double	      head_e1, tail_e1;
	      double	      d1;
	      double	      d2;
	      /* Real part */
	      d1 = (double)x_elem[0] * a_elem[0];
	      d2 = (double)-x_elem[1] * a_elem[1];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[0] = head_e1;
	      tail_prod[0] = tail_e1;
	      /* imaginary part */
	      d1 = (double)x_elem[0] * a_elem[1];
	      d2 = (double)x_elem[1] * a_elem[0];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[1] = head_e1;
	      tail_prod[1] = tail_e1;
	    }
	    {
	      double	      head_t, tail_t;
	      double	      head_a, tail_a;
	      double	      head_b, tail_b;
	      /* Real part */
	      head_a = head_sum1[0];
	      tail_a = tail_sum1[0];
	      head_b = head_prod[0];
	      tail_b = tail_prod[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum1[0] = head_t;
	      tail_sum1[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_sum1[1];
	      tail_a = tail_sum1[1];
	      head_b = head_prod[1];
	      tail_b = tail_prod[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum1[1] = head_t;
	      tail_sum1[1] = tail_t;
	    }
	    x_elem[0] = tail_x_i[jx];
	    x_elem[1] = tail_x_i[jx + 1];
	    {
	      double	      head_e1, tail_e1;
	      double	      d1;
	      double	      d2;
	      /* Real part */
	      d1 = (double)x_elem[0] * a_elem[0];
	      d2 = (double)-x_elem[1] * a_elem[1];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[0] = head_e1;
	      tail_prod[0] = tail_e1;
	      /* imaginary part */
	      d1 = (double)x_elem[0] * a_elem[1];
	      d2 = (double)x_elem[1] * a_elem[0];
	      {
		/* Compute double-double = double + double. */
		double		e      , t1, t2;

		/* Knuth trick. */
		t1 = d1 + d2;
		e = t1 - d1;
		t2 = ((d2 - e) + (d1 - (t1 - e)));

		/* The result is t1 + t2, after normalization. */
		head_e1 = t1 + t2;
		tail_e1 = t2 - (head_e1 - t1);
	      }
	      head_prod[1] = head_e1;
	      tail_prod[1] = tail_e1;
	    }
	    {
	      double	      head_t, tail_t;
	      double	      head_a, tail_a;
	      double	      head_b, tail_b;
	      /* Real part */
	      head_a = head_sum2[0];
	      tail_a = tail_sum2[0];
	      head_b = head_prod[0];
	      tail_b = tail_prod[0];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum2[0] = head_t;
	      tail_sum2[0] = tail_t;
	      /* Imaginary part */
	      head_a = head_sum2[1];
	      tail_a = tail_sum2[1];
	      head_b = head_prod[1];
	      tail_b = tail_prod[1];
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_a + head_b;
		bv = s1 - head_a;
		s2 = ((head_b - bv) + (head_a - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_a + tail_b;
		bv = t1 - tail_a;
		t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_t = t1 + t2;
		tail_t = t2 - (head_t - t1);
	      }
	      head_sum2[1] = head_t;
	      tail_sum2[1] = tail_t;
	    }
	    aij += incaij;
	    jx += incx;
	  }
	}

	{
	  double	  cd     [2];
	  cd[0] = (double)alpha_i[0];
	  cd[1] = (double)alpha_i[1];
	  {
	    /* Compute complex-extra = complex-extra * complex-double. */
	    double	    head_a0, tail_a0;
	    double	    head_a1, tail_a1;
	    double	    head_t1, tail_t1;
	    double	    head_t2, tail_t2;
	    head_a0 = head_sum1[0];
	    tail_a0 = tail_sum1[0];
	    head_a1 = head_sum1[1];
	    tail_a1 = tail_sum1[1];
	    /* real part */
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a0 * split;
	      a11 = con - head_a0;
	      a11 = con - a11;
	      a21 = head_a0 - a11;
	      con = cd[0] * split;
	      b1 = con - cd[0];
	      b1 = con - b1;
	      b2 = cd[0] - b1;

	      c11 = head_a0 * cd[0];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a0 * cd[0];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a1 * split;
	      a11 = con - head_a1;
	      a11 = con - a11;
	      a21 = head_a1 - a11;
	      con = cd[1] * split;
	      b1 = con - cd[1];
	      b1 = con - b1;
	      b2 = cd[1] - b1;

	      c11 = head_a1 * cd[1];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a1 * cd[1];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t2 = t1 + t2;
	      tail_t2 = t2 - (head_t2 - t1);
	    }
	    head_t2 = -head_t2;
	    tail_t2 = -tail_t2;
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp1[0] = head_t1;
	    tail_tmp1[0] = tail_t1;
	    /* imaginary part */
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a1 * split;
	      a11 = con - head_a1;
	      a11 = con - a11;
	      a21 = head_a1 - a11;
	      con = cd[0] * split;
	      b1 = con - cd[0];
	      b1 = con - b1;
	      b2 = cd[0] - b1;

	      c11 = head_a1 * cd[0];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a1 * cd[0];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a0 * split;
	      a11 = con - head_a0;
	      a11 = con - a11;
	      a21 = head_a0 - a11;
	      con = cd[1] * split;
	      b1 = con - cd[1];
	      b1 = con - b1;
	      b2 = cd[1] - b1;

	      c11 = head_a0 * cd[1];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a0 * cd[1];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t2 = t1 + t2;
	      tail_t2 = t2 - (head_t2 - t1);
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp1[1] = head_t1;
	    tail_tmp1[1] = tail_t1;
	  }

	}
	{
	  double	  cd     [2];
	  cd[0] = (double)alpha_i[0];
	  cd[1] = (double)alpha_i[1];
	  {
	    /* Compute complex-extra = complex-extra * complex-double. */
	    double	    head_a0, tail_a0;
	    double	    head_a1, tail_a1;
	    double	    head_t1, tail_t1;
	    double	    head_t2, tail_t2;
	    head_a0 = head_sum2[0];
	    tail_a0 = tail_sum2[0];
	    head_a1 = head_sum2[1];
	    tail_a1 = tail_sum2[1];
	    /* real part */
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a0 * split;
	      a11 = con - head_a0;
	      a11 = con - a11;
	      a21 = head_a0 - a11;
	      con = cd[0] * split;
	      b1 = con - cd[0];
	      b1 = con - b1;
	      b2 = cd[0] - b1;

	      c11 = head_a0 * cd[0];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a0 * cd[0];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a1 * split;
	      a11 = con - head_a1;
	      a11 = con - a11;
	      a21 = head_a1 - a11;
	      con = cd[1] * split;
	      b1 = con - cd[1];
	      b1 = con - b1;
	      b2 = cd[1] - b1;

	      c11 = head_a1 * cd[1];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a1 * cd[1];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t2 = t1 + t2;
	      tail_t2 = t2 - (head_t2 - t1);
	    }
	    head_t2 = -head_t2;
	    tail_t2 = -tail_t2;
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp2[0] = head_t1;
	    tail_tmp2[0] = tail_t1;
	    /* imaginary part */
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a1 * split;
	      a11 = con - head_a1;
	      a11 = con - a11;
	      a21 = head_a1 - a11;
	      con = cd[0] * split;
	      b1 = con - cd[0];
	      b1 = con - b1;
	      b2 = cd[0] - b1;

	      c11 = head_a1 * cd[0];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a1 * cd[0];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_a0 * split;
	      a11 = con - head_a0;
	      a11 = con - a11;
	      a21 = head_a0 - a11;
	      con = cd[1] * split;
	      b1 = con - cd[1];
	      b1 = con - b1;
	      b2 = cd[1] - b1;

	      c11 = head_a0 * cd[1];
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_a0 * cd[1];
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_t2 = t1 + t2;
	      tail_t2 = t2 - (head_t2 - t1);
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_t1 + head_t2;
	      bv = s1 - head_t1;
	      s2 = ((head_t2 - bv) + (head_t1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_t1 + tail_t2;
	      bv = t1 - tail_t1;
	      t2 = ((tail_t2 - bv) + (tail_t1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_t1 = t1 + t2;
	      tail_t1 = t2 - (head_t1 - t1);
	    }
	    head_tmp2[1] = head_t1;
	    tail_tmp2[1] = tail_t1;
	  }

	}
	{
	  double	  head_t, tail_t;
	  double	  head_a, tail_a;
	  double	  head_b, tail_b;
	  /* Real part */
	  head_a = head_tmp1[0];
	  tail_a = tail_tmp1[0];
	  head_b = head_tmp2[0];
	  tail_b = tail_tmp2[0];
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_a + head_b;
	    bv = s1 - head_a;
	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_a + tail_b;
	    bv = t1 - tail_a;
	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_t = t1 + t2;
	    tail_t = t2 - (head_t - t1);
	  }
	  head_tmp3[0] = head_t;
	  tail_tmp3[0] = tail_t;
	  /* Imaginary part */
	  head_a = head_tmp1[1];
	  tail_a = tail_tmp1[1];
	  head_b = head_tmp2[1];
	  tail_b = tail_tmp2[1];
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_a + head_b;
	    bv = s1 - head_a;
	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_a + tail_b;
	    bv = t1 - tail_a;
	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    head_t = t1 + t2;
	    tail_t = t2 - (head_t - t1);
	  }
	  head_tmp3[1] = head_t;
	  tail_tmp3[1] = tail_t;
	}
	y_elem[0] = y_i[iy];
	y_elem[1] = y_i[iy + 1];
	{
	  double	  head_e1, tail_e1;
	  double	  d1;
	  double	  d2;
	  /* Real part */
	  d1 = (double)beta_i[0] * y_elem[0];
	  d2 = (double)-beta_i[1] * y_elem[1];
	  {
	    /* Compute double-double = double + double. */
	    double	    e   , t1, t2;

	    /* Knuth trick. */
	    t1 = d1 + d2;
	    e = t1 - d1;
	    t2 = ((d2 - e) + (d1 - (t1 - e)));

	    /* The result is t1 + t2, after normalization. */
	    head_e1 = t1 + t2;
	    tail_e1 = t2 - (head_e1 - t1);
	  }
	  head_tmp4[0] = head_e1;
	  tail_tmp4[0] = tail_e1;
	  /* imaginary part */
	  d1 = (double)beta_i[0] * y_elem[1];
	  d2 = (double)beta_i[1] * y_elem[0];
	  {
	    /* Compute double-double = double + double. */
	    double	    e   , t1, t2;

	    /* Knuth trick. */
	    t1 = d1 + d2;
	    e = t1 - d1;
	    t2 = ((d2 - e) + (d1 - (t1 - e)));

	    /* The result is t1 + t2, after normalization. */
	    head_e1 = t1 + t2;
	    tail_e1 = t2 - (head_e1 - t1);
	  }
	  head_tmp4[1] = head_e1;
	  tail_tmp4[1] = tail_e1;
	}
	{
	  double	  head_a, tail_a;
	  double	  head_b, tail_b;
	  /* Real part */
	  head_a = head_tmp4[0];
	  tail_a = tail_tmp4[0];
	  head_b = head_tmp3[0];
	  tail_b = tail_tmp3[0];
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_a + head_b;
	    bv = s1 - head_a;
	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_a + tail_b;
	    bv = t1 - tail_a;
	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    result[0] = t1 + t2;
	  }
	  /* Imaginary part */
	  head_a = head_tmp4[1];
	  tail_a = tail_tmp4[1];
	  head_b = head_tmp3[1];
	  tail_b = tail_tmp3[1];
	  {
	    /* Compute double-double = double-double + double-double. */
	    double	    bv;
	    double	    s1  , s2, t1, t2;

	    /* Add two hi words. */
	    s1 = head_a + head_b;
	    bv = s1 - head_a;
	    s2 = ((head_b - bv) + (head_a - (s1 - bv)));

	    /* Add two lo words. */
	    t1 = tail_a + tail_b;
	    bv = t1 - tail_a;
	    t2 = ((tail_b - bv) + (tail_a - (t1 - bv)));

	    s2 += t1;

	    /* Renormalize (s1, s2)  to  (t1, s2) */
	    t1 = s1 + s2;
	    s2 = s2 - (t1 - s1);

	    t2 += s2;

	    /* Renormalize (t1, t2)  */
	    result[1] = t1 + t2;
	  }
	}
	y_i[iy] = result[0];
	y_i[iy + 1] = result[1];

	iy += incy;
	if (i >= lbound) {
	  ix0 += incx;
	  ai += incai2;
	  la++;
	} else {
	  ai += incai1;
	}
	if (i < rbound) {
	  ra++;
	}
      }

      FPU_FIX_STOP;
    }
    break;
  }
}				/* end BLAS_cgbmv2_x */
Exemplo n.º 19
0
void		BLAS_dgemv2_d_s_x(enum blas_order_type order, enum blas_trans_type trans,
					int		m       , int n, double alpha, const double *a, int lda,
		const		float *head_x, const float *tail_x, int incx,
					double	beta  , double *y, int incy,
					enum		blas_prec_type prec)
/*
 * Purpose
 * =======
 *
 * Computes y = alpha * op(A) * head_x + alpha * op(A) * tail_x + beta * y,
 * where A is a general matrix.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of A; row or column major
 *
 * trans        (input) blas_trans_type
 *              Transpose of A: no trans, trans, or conjugate trans
 *
 * m            (input) int
 *              Dimension of A
 *
 * n            (input) int
 *              Dimension of A and the length of vector x and z
 *
 * alpha        (input) double
 *
 * A            (input) const double*
 *
 * lda          (input) int
 *              Leading dimension of A
 *
 * head_x
 * tail_x       (input) const float*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) double
 *
 * y            (input) const double*
 *
 * incy         (input) int
 *              The stride for vector y.
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  static const char routine_name[] = "BLAS_dgemv2_d_s_x";
  switch (prec) {
  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:{

      int	      i    , j;
      int	      iy   , jx, kx, ky;
      int	      lenx , leny;
      int	      ai   , aij;
      int	      incai, incaij;

      const double   *a_i = a;
      const float    *head_x_i = head_x;
      const float    *tail_x_i = tail_x;
      double         *y_i = y;
      double	      alpha_i = alpha;
      double	      beta_i = beta;
      double	      a_elem;
      float	      x_elem;
      double	      y_elem;
      double	      prod;
      double	      sum;
      double	      sum2;
      double	      tmp1;
      double	      tmp2;


      /* all error calls */
      if (m < 0)
	BLAS_error(routine_name, -3, m, 0);
      else if (n <= 0)
	BLAS_error(routine_name, -4, n, 0);
      else if (incx == 0)
	BLAS_error(routine_name, -10, incx, 0);
      else if (incy == 0)
	BLAS_error(routine_name, -13, incy, 0);

      if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	lenx = n;
	leny = m;
	incai = lda;
	incaij = 1;
      } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) {
	lenx = m;
	leny = n;
	incai = 1;
	incaij = lda;
      } else if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	lenx = n;
	leny = m;
	incai = 1;
	incaij = lda;
      } else {			/* colmajor and blas_trans */
	lenx = m;
	leny = n;
	incai = lda;
	incaij = 1;
      }

      if (lda < leny)
	BLAS_error(routine_name, -7, lda, NULL);








      if (incx > 0)
	kx = 0;
      else
	kx = (1 - lenx) * incx;
      if (incy > 0)
	ky = 0;
      else
	ky = (1 - leny) * incy;

      /* No extra-precision needed for alpha = 0 */
      if (alpha_i == 0.0) {
	if (beta_i == 0.0) {
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    y_i[iy] = 0.0;
	    iy += incy;
	  }
	} else if (!(beta_i == 0.0)) {
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    y_elem = y_i[iy];
	    tmp1 = y_elem * beta_i;
	    y_i[iy] = tmp1;
	    iy += incy;
	  }
	}
      } else {			/* alpha != 0 */

	/*
	 * if beta = 0, we can save m multiplies: y = alpha*A*head_x +
	 * alpha*A*tail_x
	 */
	if (beta_i == 0.0) {
	  if (alpha_i == 1.0) {
	    /* save m more multiplies if alpha = 1 */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      sum = 0.0;
	      sum2 = 0.0;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		prod = a_elem * x_elem;
		sum = sum + prod;
		x_elem = tail_x_i[jx];
		prod = a_elem * x_elem;
		sum2 = sum2 + prod;
		aij += incaij;
		jx += incx;
	      }
	      sum = sum + sum2;
	      y_i[iy] = sum;
	      ai += incai;
	      iy += incy;
	    }			/* end for */
	  } else {		/* alpha != 1 */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      sum = 0.0;
	      sum2 = 0.0;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		prod = a_elem * x_elem;
		sum = sum + prod;
		x_elem = tail_x_i[jx];
		prod = a_elem * x_elem;
		sum2 = sum2 + prod;
		aij += incaij;
		jx += incx;
	      }
	      tmp1 = sum * alpha_i;
	      tmp2 = sum2 * alpha_i;
	      tmp1 = tmp1 + tmp2;
	      y_i[iy] = tmp1;
	      ai += incai;
	      iy += incy;
	    }
	  }
	} else {		/* beta != 0 */
	  if (alpha_i == 1.0) {
	    /* save m multiplies if alpha = 1 */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      sum = 0.0;;
	      sum2 = 0.0;;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		prod = a_elem * x_elem;
		sum = sum + prod;
		x_elem = tail_x_i[jx];
		prod = a_elem * x_elem;
		sum2 = sum2 + prod;
		aij += incaij;
		jx += incx;
	      }
	      sum = sum + sum2;
	      y_elem = y_i[iy];
	      tmp1 = y_elem * beta_i;
	      tmp2 = sum + tmp1;
	      y_i[iy] = tmp2;
	      ai += incai;
	      iy += incy;
	    }
	  } else {		/* alpha != 1, the most general form: y =
				 * alpha*A*head_x + alpha*A*tail_x + beta*y */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      sum = 0.0;;
	      sum2 = 0.0;;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		prod = a_elem * x_elem;
		sum = sum + prod;
		x_elem = tail_x_i[jx];
		prod = a_elem * x_elem;
		sum2 = sum2 + prod;
		aij += incaij;
		jx += incx;
	      }
	      tmp1 = sum * alpha_i;
	      tmp2 = sum2 * alpha_i;
	      tmp1 = tmp1 + tmp2;
	      y_elem = y_i[iy];
	      tmp2 = y_elem * beta_i;
	      tmp1 = tmp1 + tmp2;
	      y_i[iy] = tmp1;
	      ai += incai;
	      iy += incy;
	    }
	  }
	}

      }



      break;
    }
  case blas_prec_extra:{

      int	      i    , j;
      int	      iy   , jx, kx, ky;
      int	      lenx , leny;
      int	      ai   , aij;
      int	      incai, incaij;

      const double   *a_i = a;
      const float    *head_x_i = head_x;
      const float    *tail_x_i = tail_x;
      double         *y_i = y;
      double	      alpha_i = alpha;
      double	      beta_i = beta;
      double	      a_elem;
      float	      x_elem;
      double	      y_elem;
      double	      head_prod, tail_prod;
      double	      head_sum, tail_sum;
      double	      head_sum2, tail_sum2;
      double	      head_tmp1, tail_tmp1;
      double	      head_tmp2, tail_tmp2;
      FPU_FIX_DECL;

      /* all error calls */
      if (m < 0)
	BLAS_error(routine_name, -3, m, 0);
      else if (n <= 0)
	BLAS_error(routine_name, -4, n, 0);
      else if (incx == 0)
	BLAS_error(routine_name, -10, incx, 0);
      else if (incy == 0)
	BLAS_error(routine_name, -13, incy, 0);

      if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
	lenx = n;
	leny = m;
	incai = lda;
	incaij = 1;
      } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) {
	lenx = m;
	leny = n;
	incai = 1;
	incaij = lda;
      } else if ((order == blas_colmajor) && (trans == blas_no_trans)) {
	lenx = n;
	leny = m;
	incai = 1;
	incaij = lda;
      } else {			/* colmajor and blas_trans */
	lenx = m;
	leny = n;
	incai = lda;
	incaij = 1;
      }

      if (lda < leny)
	BLAS_error(routine_name, -7, lda, NULL);

      FPU_FIX_START;






      if (incx > 0)
	kx = 0;
      else
	kx = (1 - lenx) * incx;
      if (incy > 0)
	ky = 0;
      else
	ky = (1 - leny) * incy;

      /* No extra-precision needed for alpha = 0 */
      if (alpha_i == 0.0) {
	if (beta_i == 0.0) {
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    y_i[iy] = 0.0;
	    iy += incy;
	  }
	} else if (!(beta_i == 0.0)) {
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    y_elem = y_i[iy];
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = y_elem * split;
	      a1 = con - y_elem;
	      a1 = con - a1;
	      a2 = y_elem - a1;
	      con = beta_i * split;
	      b1 = con - beta_i;
	      b1 = con - b1;
	      b2 = beta_i - b1;

	      head_tmp1 = y_elem * beta_i;
	      tail_tmp1 = (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    y_i[iy] = head_tmp1;
	    iy += incy;
	  }
	}
      } else {			/* alpha != 0 */

	/*
	 * if beta = 0, we can save m multiplies: y = alpha*A*head_x +
	 * alpha*A*tail_x
	 */
	if (beta_i == 0.0) {
	  if (alpha_i == 1.0) {
	    /* save m more multiplies if alpha = 1 */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      head_sum = tail_sum = 0.0;
	      head_sum2 = tail_sum2 = 0.0;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum + head_prod;
		  bv = s1 - head_sum;
		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum + tail_prod;
		  bv = t1 - tail_sum;
		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum = t1 + t2;
		  tail_sum = t2 - (head_sum - t1);
		}
		x_elem = tail_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum2 + head_prod;
		  bv = s1 - head_sum2;
		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum2 + tail_prod;
		  bv = t1 - tail_sum2;
		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum2 = t1 + t2;
		  tail_sum2 = t2 - (head_sum2 - t1);
		}
		aij += incaij;
		jx += incx;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_sum + head_sum2;
		bv = s1 - head_sum;
		s2 = ((head_sum2 - bv) + (head_sum - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_sum + tail_sum2;
		bv = t1 - tail_sum;
		t2 = ((tail_sum2 - bv) + (tail_sum - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_sum = t1 + t2;
		tail_sum = t2 - (head_sum - t1);
	      }
	      y_i[iy] = head_sum;
	      ai += incai;
	      iy += incy;
	    }			/* end for */
	  } else {		/* alpha != 1 */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      head_sum = tail_sum = 0.0;
	      head_sum2 = tail_sum2 = 0.0;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum + head_prod;
		  bv = s1 - head_sum;
		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum + tail_prod;
		  bv = t1 - tail_sum;
		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum = t1 + t2;
		  tail_sum = t2 - (head_sum - t1);
		}
		x_elem = tail_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum2 + head_prod;
		  bv = s1 - head_sum2;
		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum2 + tail_prod;
		  bv = t1 - tail_sum2;
		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum2 = t1 + t2;
		  tail_sum2 = t2 - (head_sum2 - t1);
		}
		aij += incaij;
		jx += incx;
	      }
	      {
		/* Compute double-double = double-double * double. */
		double		a11    , a21, b1, b2, c11, c21, c2, con, t1, t2;

		con = head_sum * split;
		a11 = con - head_sum;
		a11 = con - a11;
		a21 = head_sum - a11;
		con = alpha_i * split;
		b1 = con - alpha_i;
		b1 = con - b1;
		b2 = alpha_i - b1;

		c11 = head_sum * alpha_i;
		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		c2 = tail_sum * alpha_i;
		t1 = c11 + c2;
		t2 = (c2 - (t1 - c11)) + c21;

		head_tmp1 = t1 + t2;
		tail_tmp1 = t2 - (head_tmp1 - t1);
	      }
	      {
		/* Compute double-double = double-double * double. */
		double		a11    , a21, b1, b2, c11, c21, c2, con, t1, t2;

		con = head_sum2 * split;
		a11 = con - head_sum2;
		a11 = con - a11;
		a21 = head_sum2 - a11;
		con = alpha_i * split;
		b1 = con - alpha_i;
		b1 = con - b1;
		b2 = alpha_i - b1;

		c11 = head_sum2 * alpha_i;
		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		c2 = tail_sum2 * alpha_i;
		t1 = c11 + c2;
		t2 = (c2 - (t1 - c11)) + c21;

		head_tmp2 = t1 + t2;
		tail_tmp2 = t2 - (head_tmp2 - t1);
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_tmp1 + head_tmp2;
		bv = s1 - head_tmp1;
		s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_tmp1 + tail_tmp2;
		bv = t1 - tail_tmp1;
		t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_tmp1 = t1 + t2;
		tail_tmp1 = t2 - (head_tmp1 - t1);
	      }
	      y_i[iy] = head_tmp1;
	      ai += incai;
	      iy += incy;
	    }
	  }
	} else {		/* beta != 0 */
	  if (alpha_i == 1.0) {
	    /* save m multiplies if alpha = 1 */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      head_sum = tail_sum = 0.0;;
	      head_sum2 = tail_sum2 = 0.0;;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum + head_prod;
		  bv = s1 - head_sum;
		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum + tail_prod;
		  bv = t1 - tail_sum;
		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum = t1 + t2;
		  tail_sum = t2 - (head_sum - t1);
		}
		x_elem = tail_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum2 + head_prod;
		  bv = s1 - head_sum2;
		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum2 + tail_prod;
		  bv = t1 - tail_sum2;
		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum2 = t1 + t2;
		  tail_sum2 = t2 - (head_sum2 - t1);
		}
		aij += incaij;
		jx += incx;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_sum + head_sum2;
		bv = s1 - head_sum;
		s2 = ((head_sum2 - bv) + (head_sum - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_sum + tail_sum2;
		bv = t1 - tail_sum;
		t2 = ((tail_sum2 - bv) + (tail_sum - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_sum = t1 + t2;
		tail_sum = t2 - (head_sum - t1);
	      }
	      y_elem = y_i[iy];
	      {
		/* Compute double_double = double * double. */
		double		a1     , a2, b1, b2, con;

		con = y_elem * split;
		a1 = con - y_elem;
		a1 = con - a1;
		a2 = y_elem - a1;
		con = beta_i * split;
		b1 = con - beta_i;
		b1 = con - b1;
		b2 = beta_i - b1;

		head_tmp1 = y_elem * beta_i;
		tail_tmp1 = (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_sum + head_tmp1;
		bv = s1 - head_sum;
		s2 = ((head_tmp1 - bv) + (head_sum - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_sum + tail_tmp1;
		bv = t1 - tail_sum;
		t2 = ((tail_tmp1 - bv) + (tail_sum - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_tmp2 = t1 + t2;
		tail_tmp2 = t2 - (head_tmp2 - t1);
	      }
	      y_i[iy] = head_tmp2;
	      ai += incai;
	      iy += incy;
	    }
	  } else {		/* alpha != 1, the most general form: y =
				 * alpha*A*head_x + alpha*A*tail_x + beta*y */
	    ai = 0;
	    iy = ky;
	    for (i = 0; i < leny; i++) {
	      head_sum = tail_sum = 0.0;;
	      head_sum2 = tail_sum2 = 0.0;;
	      aij = ai;
	      jx = kx;
	      for (j = 0; j < lenx; j++) {
		a_elem = a_i[aij];

		x_elem = head_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum + head_prod;
		  bv = s1 - head_sum;
		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum + tail_prod;
		  bv = t1 - tail_sum;
		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum = t1 + t2;
		  tail_sum = t2 - (head_sum - t1);
		}
		x_elem = tail_x_i[jx];
		{
		  double	  dt = (double)x_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = a_elem * split;
		    a1 = con - a_elem;
		    a1 = con - a1;
		    a2 = a_elem - a1;
		    con = dt * split;
		    b1 = con - dt;
		    b1 = con - b1;
		    b2 = dt - b1;

		    head_prod = a_elem * dt;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum2 + head_prod;
		  bv = s1 - head_sum2;
		  s2 = ((head_prod - bv) + (head_sum2 - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum2 + tail_prod;
		  bv = t1 - tail_sum2;
		  t2 = ((tail_prod - bv) + (tail_sum2 - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum2 = t1 + t2;
		  tail_sum2 = t2 - (head_sum2 - t1);
		}
		aij += incaij;
		jx += incx;
	      }
	      {
		/* Compute double-double = double-double * double. */
		double		a11    , a21, b1, b2, c11, c21, c2, con, t1, t2;

		con = head_sum * split;
		a11 = con - head_sum;
		a11 = con - a11;
		a21 = head_sum - a11;
		con = alpha_i * split;
		b1 = con - alpha_i;
		b1 = con - b1;
		b2 = alpha_i - b1;

		c11 = head_sum * alpha_i;
		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		c2 = tail_sum * alpha_i;
		t1 = c11 + c2;
		t2 = (c2 - (t1 - c11)) + c21;

		head_tmp1 = t1 + t2;
		tail_tmp1 = t2 - (head_tmp1 - t1);
	      }
	      {
		/* Compute double-double = double-double * double. */
		double		a11    , a21, b1, b2, c11, c21, c2, con, t1, t2;

		con = head_sum2 * split;
		a11 = con - head_sum2;
		a11 = con - a11;
		a21 = head_sum2 - a11;
		con = alpha_i * split;
		b1 = con - alpha_i;
		b1 = con - b1;
		b2 = alpha_i - b1;

		c11 = head_sum2 * alpha_i;
		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		c2 = tail_sum2 * alpha_i;
		t1 = c11 + c2;
		t2 = (c2 - (t1 - c11)) + c21;

		head_tmp2 = t1 + t2;
		tail_tmp2 = t2 - (head_tmp2 - t1);
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_tmp1 + head_tmp2;
		bv = s1 - head_tmp1;
		s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_tmp1 + tail_tmp2;
		bv = t1 - tail_tmp1;
		t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_tmp1 = t1 + t2;
		tail_tmp1 = t2 - (head_tmp1 - t1);
	      }
	      y_elem = y_i[iy];
	      {
		/* Compute double_double = double * double. */
		double		a1     , a2, b1, b2, con;

		con = y_elem * split;
		a1 = con - y_elem;
		a1 = con - a1;
		a2 = y_elem - a1;
		con = beta_i * split;
		b1 = con - beta_i;
		b1 = con - b1;
		b2 = beta_i - b1;

		head_tmp2 = y_elem * beta_i;
		tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_tmp1 + head_tmp2;
		bv = s1 - head_tmp1;
		s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_tmp1 + tail_tmp2;
		bv = t1 - tail_tmp1;
		t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_tmp1 = t1 + t2;
		tail_tmp1 = t2 - (head_tmp1 - t1);
	      }
	      y_i[iy] = head_tmp1;
	      ai += incai;
	      iy += incy;
	    }
	  }
	}

      }

      FPU_FIX_STOP;
    }
    break;
  }
}
Exemplo n.º 20
0
/*
 * Purpose
 * =======
 *
 * Computes y = alpha * ap * x + beta * y, where ap is a symmetric
 * packed matrix.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of ap; row or column major
 *
 * uplo         (input) blas_uplo_type
 *              Whether ap is upper or lower
 *
 * n            (input) int
 *              Dimension of ap and the length of vector x
 *
 * alpha        (input) const void*
 *
 * ap           (input) double*
 *
 * x            (input) void*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) const void*
 *
 * y            (input/output) void*
 *
 * incy         (input) int
 *              The stride for vector y.
 *
 */
void		BLAS_zspmv_d_z(enum blas_order_type order, enum blas_uplo_type uplo,
		     		int		n       , const void *alpha, const double *ap,
	    		const		void  *x, int incx, const void *beta,
		     		void         *y, int incy)
{
  static const char routine_name[] = "BLAS_zspmv_d_z";

  {
    int		    matrix_row, step, ap_index, ap_start, x_index, x_start;
    int		    y_start, y_index, incap;
    double         *alpha_i = (double *)alpha;
    double         *beta_i = (double *)beta;

    const double   *ap_i = ap;
    const double   *x_i = (double *)x;
    double         *y_i = (double *)y;
    double	    rowsum[2];
    double	    rowtmp[2];
    double	    matval;
    double	    vecval[2];
    double	    resval[2];
    double	    tmp1 [2];
    double	    tmp2 [2];


    incap = 1;

    incx *= 2;
    incy *= 2;

    if (incx < 0)
      x_start = (-n + 1) * incx;
    else
      x_start = 0;
    if (incy < 0)
      y_start = (-n + 1) * incy;
    else
      y_start = 0;

    if (n < 1) {
      return;
    }
    if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
      return;
    }
    /* Check for error conditions. */
    if (order != blas_colmajor && order != blas_rowmajor) {
      BLAS_error(routine_name, -1, order, NULL);
    }
    if (uplo != blas_upper && uplo != blas_lower) {
      BLAS_error(routine_name, -2, uplo, NULL);
    }
    if (incx == 0) {
      BLAS_error(routine_name, -7, incx, NULL);
    }
    if (incy == 0) {
      BLAS_error(routine_name, -10, incy, NULL);
    }
    if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
      {
	y_index = y_start;
	for (matrix_row = 0; matrix_row < n; matrix_row++) {
	  resval[0] = y_i[y_index];
	  resval[1] = y_i[y_index + 1];

	  {
	    tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1];
	    tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0];
	  }

	  y_i[y_index] = tmp2[0];
	  y_i[y_index + 1] = tmp2[1];

	  y_index += incy;
	}
      }
    } else {
      if (uplo == blas_lower)
	order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor;
      if (order == blas_rowmajor) {
	if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	  if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (n - step - 1) * incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		tmp1[0] = rowsum[0];
		tmp1[1] = rowsum[1];
		y_i[y_index] = tmp1[0];
		y_i[y_index + 1] = tmp1[1];

		y_index += incy;
		ap_start += incap;
	      }
	    }
	  } else {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (n - step - 1) * incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		resval[0] = y_i[y_index];
		resval[1] = y_i[y_index + 1];
		tmp1[0] = rowsum[0];
		tmp1[1] = rowsum[1];
		{
		  tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1];
		  tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0];
		}
		tmp2[0] = tmp1[0] + tmp2[0];
		tmp2[1] = tmp1[1] + tmp2[1];
		y_i[y_index] = tmp2[0];
		y_i[y_index + 1] = tmp2[1];

		y_index += incy;
		ap_start += incap;
	      }
	    }
	  }
	} else {
	  if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (n - step - 1) * incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		{
		  tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1];
		  tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0];
		}
		y_i[y_index] = tmp1[0];
		y_i[y_index + 1] = tmp1[1];

		y_index += incy;
		ap_start += incap;
	      }
	    }
	  } else {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (n - step - 1) * incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		resval[0] = y_i[y_index];
		resval[1] = y_i[y_index + 1];
		{
		  tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1];
		  tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0];
		}
		{
		  tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1];
		  tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0];
		}
		tmp2[0] = tmp1[0] + tmp2[0];
		tmp2[1] = tmp1[1] + tmp2[1];
		y_i[y_index] = tmp2[0];
		y_i[y_index + 1] = tmp2[1];

		y_index += incy;
		ap_start += incap;
	      }
	    }
	  }
	}
      } else {
	if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	  if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (step + 1) * incap;
		  x_index += incx;
		}
		tmp1[0] = rowsum[0];
		tmp1[1] = rowsum[1];
		y_i[y_index] = tmp1[0];
		y_i[y_index + 1] = tmp1[1];

		y_index += incy;
		ap_start += (matrix_row + 1) * incap;
	      }
	    }
	  } else {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (step + 1) * incap;
		  x_index += incx;
		}
		resval[0] = y_i[y_index];
		resval[1] = y_i[y_index + 1];
		tmp1[0] = rowsum[0];
		tmp1[1] = rowsum[1];
		{
		  tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1];
		  tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0];
		}
		tmp2[0] = tmp1[0] + tmp2[0];
		tmp2[1] = tmp1[1] + tmp2[1];
		y_i[y_index] = tmp2[0];
		y_i[y_index + 1] = tmp2[1];

		y_index += incy;
		ap_start += (matrix_row + 1) * incap;
	      }
	    }
	  }
	} else {
	  if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (step + 1) * incap;
		  x_index += incx;
		}
		{
		  tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1];
		  tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0];
		}
		y_i[y_index] = tmp1[0];
		y_i[y_index + 1] = tmp1[1];

		y_index += incy;
		ap_start += (matrix_row + 1) * incap;
	      }
	    }
	  } else {
	    {
	      y_index = y_start;
	      ap_start = 0;
	      for (matrix_row = 0; matrix_row < n; matrix_row++) {
		x_index = x_start;
		ap_index = ap_start;
		rowsum[0] = rowsum[1] = 0.0;
		rowtmp[0] = rowtmp[1] = 0.0;
		for (step = 0; step < matrix_row; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += incap;
		  x_index += incx;
		}
		for (step = matrix_row; step < n; step++) {
		  matval = ap_i[ap_index];
		  vecval[0] = x_i[x_index];
		  vecval[1] = x_i[x_index + 1];
		  {
		    rowtmp[0] = vecval[0] * matval;
		    rowtmp[1] = vecval[1] * matval;
		  }
		  rowsum[0] = rowsum[0] + rowtmp[0];
		  rowsum[1] = rowsum[1] + rowtmp[1];
		  ap_index += (step + 1) * incap;
		  x_index += incx;
		}
		resval[0] = y_i[y_index];
		resval[1] = y_i[y_index + 1];
		{
		  tmp1[0] = (double)rowsum[0] * alpha_i[0] - (double)rowsum[1] * alpha_i[1];
		  tmp1[1] = (double)rowsum[0] * alpha_i[1] + (double)rowsum[1] * alpha_i[0];
		}
		{
		  tmp2[0] = (double)beta_i[0] * resval[0] - (double)beta_i[1] * resval[1];
		  tmp2[1] = (double)beta_i[0] * resval[1] + (double)beta_i[1] * resval[0];
		}
		tmp2[0] = tmp1[0] + tmp2[0];
		tmp2[1] = tmp1[1] + tmp2[1];
		y_i[y_index] = tmp2[0];
		y_i[y_index + 1] = tmp2[1];

		y_index += incy;
		ap_start += (matrix_row + 1) * incap;
	      }
	    }
	  }
	}
      }				/* if order == ... */
    }				/* alpha != 0 */


  }
}
Exemplo n.º 21
0
void		BLAS_dgemm_s_d(enum blas_order_type order, enum blas_trans_type transa,
     		enum		blas_trans_type transb, int m, int n, int k,
		     		double	alpha , const float *a, int lda, const double *b, int ldb,
		     		double	beta  , double *c, int ldc)
/*
 * Purpose
 * =======
 *
 * This routine computes the matrix product:
 *
 *      C   <-  alpha * op(A) * op(B)  +  beta * C .
 *
 * where op(M) represents either M, M transpose,
 * or M conjugate transpose.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input matrices A, B, and C.
 *
 * transa  (input) enum blas_trans_type
 *         Operation to be done on matrix A before multiplication.
 *         Can be no operation, transposition, or conjugate transposition.
 *
 * transb  (input) enum blas_trans_type
 *         Operation to be done on matrix B before multiplication.
 *         Can be no operation, transposition, or conjugate transposition.
 *
 * m n k   (input) int
 *         The dimensions of matrices A, B, and C.
 *         Matrix C is m-by-n matrix.
 *         Matrix A is m-by-k if A is not transposed,
 *                     k-by-m otherwise.
 *         Matrix B is k-by-n if B is not transposed,
 *                     n-by-k otherwise.
 *
 * alpha   (input) double
 *
 * a       (input) const float*
 *         matrix A.
 *
 * lda     (input) int
 *         leading dimension of A.
 *
 * b       (input) const double*
 *         matrix B
 *
 * ldb     (input) int
 *         leading dimension of B.
 *
 * beta    (input) double
 *
 * c       (input/output) double*
 *         matrix C
 *
 * ldc     (input) int
 *         leading dimension of C.
 *
 */
{
  static const char routine_name[] = "BLAS_dgemm_s_d";


  /* Integer Index Variables */
  int		  i       , j, h;

  int		  ai      , bj, ci;
  int		  aih     , bhj, cij;	/* Index into matrices a, b, c during
					 * multiply */

  int		  incai   , incaih;	/* Index increments for matrix a */
  int		  incbj   , incbhj;	/* Index increments for matrix b */
  int		  incci   , inccij;	/* Index increments for matrix c */

  /* Input Matrices */
  const float    *a_i = a;
  const double   *b_i = b;

  /* Output Matrix */
  double         *c_i = c;

  /* Input Scalars */
  double	  alpha_i = alpha;
  double	  beta_i = beta;

  /* Temporary Floating-Point Variables */
  float		  a_elem;
  double	  b_elem;
  double	  c_elem;
  double	  prod;
  double	  sum;
  double	  tmp1;
  double	  tmp2;



  /* Test for error conditions */
  if (m < 0)
    BLAS_error(routine_name, -4, m, NULL);
  if (n < 0)
    BLAS_error(routine_name, -5, n, NULL);
  if (k < 0)
    BLAS_error(routine_name, -6, k, NULL);

  if (order == blas_colmajor) {

    if (ldc < m)
      BLAS_error(routine_name, -14, ldc, NULL);

    if (transa == blas_no_trans) {
      if (lda < m)
	BLAS_error(routine_name, -9, lda, NULL);
    } else {
      if (lda < k)
	BLAS_error(routine_name, -9, lda, NULL);
    }

    if (transb == blas_no_trans) {
      if (ldb < k)
	BLAS_error(routine_name, -11, ldb, NULL);
    } else {
      if (ldb < n)
	BLAS_error(routine_name, -11, ldb, NULL);
    }

  } else {
    /* row major */
    if (ldc < n)
      BLAS_error(routine_name, -14, ldc, NULL);

    if (transa == blas_no_trans) {
      if (lda < k)
	BLAS_error(routine_name, -9, lda, NULL);
    } else {
      if (lda < m)
	BLAS_error(routine_name, -9, lda, NULL);
    }

    if (transb == blas_no_trans) {
      if (ldb < n)
	BLAS_error(routine_name, -11, ldb, NULL);
    } else {
      if (ldb < k)
	BLAS_error(routine_name, -11, ldb, NULL);
    }
  }

  /* Test for no-op */
  if (n == 0 || m == 0 || k == 0)
    return;
  if (alpha_i == 0.0 && beta_i == 1.0) {
    return;
  }
  /* Set Index Parameters */
  if (order == blas_colmajor) {
    incci = 1;
    inccij = ldc;

    if (transa == blas_no_trans) {
      incai = 1;
      incaih = lda;
    } else {
      incai = lda;
      incaih = 1;
    }

    if (transb == blas_no_trans) {
      incbj = ldb;
      incbhj = 1;
    } else {
      incbj = 1;
      incbhj = ldb;
    }

  } else {
    /* row major */
    incci = ldc;
    inccij = 1;

    if (transa == blas_no_trans) {
      incai = lda;
      incaih = 1;
    } else {
      incai = 1;
      incaih = lda;
    }

    if (transb == blas_no_trans) {
      incbj = 1;
      incbhj = ldb;
    } else {
      incbj = ldb;
      incbhj = 1;
    }

  }



  /* Ajustment to increments */







  /* alpha = 0.  In this case, just return beta * C */
  if (alpha_i == 0.0) {

    ci = 0;
    for (i = 0; i < m; i++, ci += incci) {
      cij = ci;
      for (j = 0; j < n; j++, cij += inccij) {
	c_elem = c_i[cij];
	tmp1 = c_elem * beta_i;
	c_i[cij] = tmp1;
      }
    }

  } else if (alpha_i == 1.0) {

    /* Case alpha == 1. */

    if (beta_i == 0.0) {
      /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */

      ci = 0;
      ai = 0;
      for (i = 0; i < m; i++, ci += incci, ai += incai) {

	cij = ci;
	bj = 0;

	for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	  aih = ai;
	  bhj = bj;

	  sum = 0.0;

	  for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
	    a_elem = a_i[aih];
	    b_elem = b_i[bhj];
	    if (transa == blas_conj_trans) {

	    }
	    if (transb == blas_conj_trans) {

	    }
	    prod = a_elem * b_elem;
	    sum = sum + prod;
	  }
	  c_i[cij] = sum;
	}
      }

    } else {
      /*
       * Case alpha == 1, but beta != 0. We compute   C <--- A * B + beta * C
       */

      ci = 0;
      ai = 0;
      for (i = 0; i < m; i++, ci += incci, ai += incai) {

	cij = ci;
	bj = 0;

	for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	  aih = ai;
	  bhj = bj;

	  sum = 0.0;

	  for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
	    a_elem = a_i[aih];
	    b_elem = b_i[bhj];
	    if (transa == blas_conj_trans) {

	    }
	    if (transb == blas_conj_trans) {

	    }
	    prod = a_elem * b_elem;
	    sum = sum + prod;
	  }

	  c_elem = c_i[cij];
	  tmp2 = c_elem * beta_i;
	  tmp1 = sum;
	  tmp1 = tmp2 + tmp1;
	  c_i[cij] = tmp1;
	}
      }
    }

  } else {

    /* The most general form,   C <-- alpha * A * B + beta * C  */
    ci = 0;
    ai = 0;
    for (i = 0; i < m; i++, ci += incci, ai += incai) {

      cij = ci;
      bj = 0;

      for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	aih = ai;
	bhj = bj;

	sum = 0.0;

	for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
	  a_elem = a_i[aih];
	  b_elem = b_i[bhj];
	  if (transa == blas_conj_trans) {

	  }
	  if (transb == blas_conj_trans) {

	  }
	  prod = a_elem * b_elem;
	  sum = sum + prod;
	}

	tmp1 = sum * alpha_i;
	c_elem = c_i[cij];
	tmp2 = c_elem * beta_i;
	tmp1 = tmp1 + tmp2;
	c_i[cij] = tmp1;
      }
    }

  }



}
Exemplo n.º 22
0
/*
 * Purpose
 * =======
 *
 * Computes y = alpha * ap * x + beta * y, where ap is a symmetric
 * packed matrix.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of ap; row or column major
 *
 * uplo         (input) blas_uplo_type
 *              Whether ap is upper or lower
 *
 * n            (input) int
 *              Dimension of ap and the length of vector x
 *
 * alpha        (input) double
 *
 * ap           (input) float*
 *
 * x            (input) double*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) double
 *
 * y            (input/output) double*
 *
 * incy         (input) int
 *              The stride for vector y.
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 *
 */
void		BLAS_dspmv_s_d_x(enum blas_order_type order, enum blas_uplo_type uplo,
	   		int		n       , double alpha, const float *ap,
		  		const		double *x, int incx, double beta,
	       		double       *y, int incy, enum blas_prec_type prec)
{
  static const char routine_name[] = "BLAS_dspmv_s_d_x";

  switch (prec) {
  case blas_prec_single:
  case blas_prec_indigenous:
  case blas_prec_double:{
      {
	int		matrix_row, step, ap_index, ap_start, x_index, x_start;
	int		y_start   , y_index, incap;
	double		alpha_i = alpha;
	double		beta_i = beta;

	const float    *ap_i = ap;
	const double   *x_i = x;
	double         *y_i = y;
	double		rowsum;
	double		rowtmp;
	float		matval;
	double		vecval;
	double		resval;
	double		tmp1;
	double		tmp2;


	incap = 1;




	if (incx < 0)
	  x_start = (-n + 1) * incx;
	else
	  x_start = 0;
	if (incy < 0)
	  y_start = (-n + 1) * incy;
	else
	  y_start = 0;

	if (n < 1) {
	  return;
	}
	if (alpha_i == 0.0 && beta_i == 1.0) {
	  return;
	}
	/* Check for error conditions. */
	if (order != blas_colmajor && order != blas_rowmajor) {
	  BLAS_error(routine_name, -1, order, NULL);
	}
	if (uplo != blas_upper && uplo != blas_lower) {
	  BLAS_error(routine_name, -2, uplo, NULL);
	}
	if (incx == 0) {
	  BLAS_error(routine_name, -7, incx, NULL);
	}
	if (incy == 0) {
	  BLAS_error(routine_name, -10, incy, NULL);
	}
	if (alpha_i == 0.0) {
	  {
	    y_index = y_start;
	    for (matrix_row = 0; matrix_row < n; matrix_row++) {
	      resval = y_i[y_index];

	      tmp2 = beta_i * resval;

	      y_i[y_index] = tmp2;

	      y_index += incy;
	    }
	  }
	} else {
	  if (uplo == blas_lower)
	    order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor;
	  if (order == blas_rowmajor) {
	    if (alpha_i == 1.0) {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    tmp1 = rowsum;
		    y_i[y_index] = tmp1;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    tmp1 = rowsum;
		    tmp2 = beta_i * resval;
		    tmp2 = tmp1 + tmp2;
		    y_i[y_index] = tmp2;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      }
	    } else {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    tmp1 = rowsum * alpha_i;
		    y_i[y_index] = tmp1;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    tmp1 = rowsum * alpha_i;
		    tmp2 = beta_i * resval;
		    tmp2 = tmp1 + tmp2;
		    y_i[y_index] = tmp2;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      }
	    }
	  } else {
	    if (alpha_i == 1.0) {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    tmp1 = rowsum;
		    y_i[y_index] = tmp1;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    tmp1 = rowsum;
		    tmp2 = beta_i * resval;
		    tmp2 = tmp1 + tmp2;
		    y_i[y_index] = tmp2;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      }
	    } else {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    tmp1 = rowsum * alpha_i;
		    y_i[y_index] = tmp1;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    rowsum = 0.0;
		    rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      rowtmp = matval * vecval;
		      rowsum = rowsum + rowtmp;
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    tmp1 = rowsum * alpha_i;
		    tmp2 = beta_i * resval;
		    tmp2 = tmp1 + tmp2;
		    y_i[y_index] = tmp2;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      }
	    }
	  }			/* if order == ... */
	}			/* alpha != 0 */


      }
      break;
    }

  case blas_prec_extra:{
      {
	int		matrix_row, step, ap_index, ap_start, x_index, x_start;
	int		y_start   , y_index, incap;
	double		alpha_i = alpha;
	double		beta_i = beta;

	const float    *ap_i = ap;
	const double   *x_i = x;
	double         *y_i = y;
	double		head_rowsum, tail_rowsum;
	double		head_rowtmp, tail_rowtmp;
	float		matval;
	double		vecval;
	double		resval;
	double		head_tmp1, tail_tmp1;
	double		head_tmp2, tail_tmp2;
	FPU_FIX_DECL;

	incap = 1;




	if (incx < 0)
	  x_start = (-n + 1) * incx;
	else
	  x_start = 0;
	if (incy < 0)
	  y_start = (-n + 1) * incy;
	else
	  y_start = 0;

	if (n < 1) {
	  return;
	}
	if (alpha_i == 0.0 && beta_i == 1.0) {
	  return;
	}
	/* Check for error conditions. */
	if (order != blas_colmajor && order != blas_rowmajor) {
	  BLAS_error(routine_name, -1, order, NULL);
	}
	if (uplo != blas_upper && uplo != blas_lower) {
	  BLAS_error(routine_name, -2, uplo, NULL);
	}
	if (incx == 0) {
	  BLAS_error(routine_name, -7, incx, NULL);
	}
	if (incy == 0) {
	  BLAS_error(routine_name, -10, incy, NULL);
	}
	FPU_FIX_START;

	if (alpha_i == 0.0) {
	  {
	    y_index = y_start;
	    for (matrix_row = 0; matrix_row < n; matrix_row++) {
	      resval = y_i[y_index];

	      {
		/* Compute double_double = double * double. */
		double		a1     , a2, b1, b2, con;

		con = beta_i * split;
		a1 = con - beta_i;
		a1 = con - a1;
		a2 = beta_i - a1;
		con = resval * split;
		b1 = con - resval;
		b1 = con - b1;
		b2 = resval - b1;

		head_tmp2 = beta_i * resval;
		tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
	      }

	      y_i[y_index] = head_tmp2;

	      y_index += incy;
	    }
	  }
	} else {
	  if (uplo == blas_lower)
	    order = (order == blas_rowmajor) ? blas_colmajor : blas_rowmajor;
	  if (order == blas_rowmajor) {
	    if (alpha_i == 1.0) {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    head_tmp1 = head_rowsum;
		    tail_tmp1 = tail_rowsum;
		    y_i[y_index] = head_tmp1;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    head_tmp1 = head_rowsum;
		    tail_tmp1 = tail_rowsum;
		    {
		      /* Compute double_double = double * double. */
		      double	      a1, a2, b1, b2, con;

		      con = beta_i * split;
		      a1 = con - beta_i;
		      a1 = con - a1;
		      a2 = beta_i - a1;
		      con = resval * split;
		      b1 = con - resval;
		      b1 = con - b1;
		      b2 = resval - b1;

		      head_tmp2 = beta_i * resval;
		      tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
		    }
		    {
		      /*
		       * Compute double-double = double-double +
		       * double-double.
		       */
		      double	      bv;
		      double	      s1, s2, t1, t2;

		      /* Add two hi words. */
		      s1 = head_tmp1 + head_tmp2;
		      bv = s1 - head_tmp1;
		      s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		      /* Add two lo words. */
		      t1 = tail_tmp1 + tail_tmp2;
		      bv = t1 - tail_tmp1;
		      t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		      s2 += t1;

		      /* Renormalize (s1, s2)  to  (t1, s2) */
		      t1 = s1 + s2;
		      s2 = s2 - (t1 - s1);

		      t2 += s2;

		      /* Renormalize (t1, t2)  */
		      head_tmp2 = t1 + t2;
		      tail_tmp2 = t2 - (head_tmp2 - t1);
		    }
		    y_i[y_index] = head_tmp2;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      }
	    } else {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    {
		      /* Compute double-double = double-double * double. */
		      double	      a11, a21, b1, b2, c11, c21, c2, con,
		      		      t1     , t2;

		      con = head_rowsum * split;
		      a11 = con - head_rowsum;
		      a11 = con - a11;
		      a21 = head_rowsum - a11;
		      con = alpha_i * split;
		      b1 = con - alpha_i;
		      b1 = con - b1;
		      b2 = alpha_i - b1;

		      c11 = head_rowsum * alpha_i;
		      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		      c2 = tail_rowsum * alpha_i;
		      t1 = c11 + c2;
		      t2 = (c2 - (t1 - c11)) + c21;

		      head_tmp1 = t1 + t2;
		      tail_tmp1 = t2 - (head_tmp1 - t1);
		    }
		    y_i[y_index] = head_tmp1;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (n - step - 1) * incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    {
		      /* Compute double-double = double-double * double. */
		      double	      a11, a21, b1, b2, c11, c21, c2, con,
		      		      t1     , t2;

		      con = head_rowsum * split;
		      a11 = con - head_rowsum;
		      a11 = con - a11;
		      a21 = head_rowsum - a11;
		      con = alpha_i * split;
		      b1 = con - alpha_i;
		      b1 = con - b1;
		      b2 = alpha_i - b1;

		      c11 = head_rowsum * alpha_i;
		      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		      c2 = tail_rowsum * alpha_i;
		      t1 = c11 + c2;
		      t2 = (c2 - (t1 - c11)) + c21;

		      head_tmp1 = t1 + t2;
		      tail_tmp1 = t2 - (head_tmp1 - t1);
		    }
		    {
		      /* Compute double_double = double * double. */
		      double	      a1, a2, b1, b2, con;

		      con = beta_i * split;
		      a1 = con - beta_i;
		      a1 = con - a1;
		      a2 = beta_i - a1;
		      con = resval * split;
		      b1 = con - resval;
		      b1 = con - b1;
		      b2 = resval - b1;

		      head_tmp2 = beta_i * resval;
		      tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
		    }
		    {
		      /*
		       * Compute double-double = double-double +
		       * double-double.
		       */
		      double	      bv;
		      double	      s1, s2, t1, t2;

		      /* Add two hi words. */
		      s1 = head_tmp1 + head_tmp2;
		      bv = s1 - head_tmp1;
		      s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		      /* Add two lo words. */
		      t1 = tail_tmp1 + tail_tmp2;
		      bv = t1 - tail_tmp1;
		      t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		      s2 += t1;

		      /* Renormalize (s1, s2)  to  (t1, s2) */
		      t1 = s1 + s2;
		      s2 = s2 - (t1 - s1);

		      t2 += s2;

		      /* Renormalize (t1, t2)  */
		      head_tmp2 = t1 + t2;
		      tail_tmp2 = t2 - (head_tmp2 - t1);
		    }
		    y_i[y_index] = head_tmp2;

		    y_index += incy;
		    ap_start += incap;
		  }
		}
	      }
	    }
	  } else {
	    if (alpha_i == 1.0) {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    head_tmp1 = head_rowsum;
		    tail_tmp1 = tail_rowsum;
		    y_i[y_index] = head_tmp1;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    head_tmp1 = head_rowsum;
		    tail_tmp1 = tail_rowsum;
		    {
		      /* Compute double_double = double * double. */
		      double	      a1, a2, b1, b2, con;

		      con = beta_i * split;
		      a1 = con - beta_i;
		      a1 = con - a1;
		      a2 = beta_i - a1;
		      con = resval * split;
		      b1 = con - resval;
		      b1 = con - b1;
		      b2 = resval - b1;

		      head_tmp2 = beta_i * resval;
		      tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
		    }
		    {
		      /*
		       * Compute double-double = double-double +
		       * double-double.
		       */
		      double	      bv;
		      double	      s1, s2, t1, t2;

		      /* Add two hi words. */
		      s1 = head_tmp1 + head_tmp2;
		      bv = s1 - head_tmp1;
		      s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		      /* Add two lo words. */
		      t1 = tail_tmp1 + tail_tmp2;
		      bv = t1 - tail_tmp1;
		      t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		      s2 += t1;

		      /* Renormalize (s1, s2)  to  (t1, s2) */
		      t1 = s1 + s2;
		      s2 = s2 - (t1 - s1);

		      t2 += s2;

		      /* Renormalize (t1, t2)  */
		      head_tmp2 = t1 + t2;
		      tail_tmp2 = t2 - (head_tmp2 - t1);
		    }
		    y_i[y_index] = head_tmp2;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      }
	    } else {
	      if (beta_i == 0.0) {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    {
		      /* Compute double-double = double-double * double. */
		      double	      a11, a21, b1, b2, c11, c21, c2, con,
		      		      t1     , t2;

		      con = head_rowsum * split;
		      a11 = con - head_rowsum;
		      a11 = con - a11;
		      a21 = head_rowsum - a11;
		      con = alpha_i * split;
		      b1 = con - alpha_i;
		      b1 = con - b1;
		      b2 = alpha_i - b1;

		      c11 = head_rowsum * alpha_i;
		      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		      c2 = tail_rowsum * alpha_i;
		      t1 = c11 + c2;
		      t2 = (c2 - (t1 - c11)) + c21;

		      head_tmp1 = t1 + t2;
		      tail_tmp1 = t2 - (head_tmp1 - t1);
		    }
		    y_i[y_index] = head_tmp1;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      } else {
		{
		  y_index = y_start;
		  ap_start = 0;
		  for (matrix_row = 0; matrix_row < n; matrix_row++) {
		    x_index = x_start;
		    ap_index = ap_start;
		    head_rowsum = tail_rowsum = 0.0;
		    head_rowtmp = tail_rowtmp = 0.0;
		    for (step = 0; step < matrix_row; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += incap;
		      x_index += incx;
		    }
		    for (step = matrix_row; step < n; step++) {
		      matval = ap_i[ap_index];
		      vecval = x_i[x_index];
		      {
			double		dt = (double)matval;
			{
			  /* Compute double_double = double * double. */
			  double	  a1    , a2, b1, b2, con;

			  con = dt * split;
			  a1 = con - dt;
			  a1 = con - a1;
			  a2 = dt - a1;
			  con = vecval * split;
			  b1 = con - vecval;
			  b1 = con - b1;
			  b2 = vecval - b1;

			  head_rowtmp = dt * vecval;
			  tail_rowtmp = (((a1 * b1 - head_rowtmp) + a1 * b2) + a2 * b1) + a2 * b2;
			}
		      }
		      {
			/*
			 * Compute double-double = double-double +
			 * double-double.
			 */
			double		bv;
			double		s1     , s2, t1, t2;

			/* Add two hi words. */
			s1 = head_rowsum + head_rowtmp;
			bv = s1 - head_rowsum;
			s2 = ((head_rowtmp - bv) + (head_rowsum - (s1 - bv)));

			/* Add two lo words. */
			t1 = tail_rowsum + tail_rowtmp;
			bv = t1 - tail_rowsum;
			t2 = ((tail_rowtmp - bv) + (tail_rowsum - (t1 - bv)));

			s2 += t1;

			/* Renormalize (s1, s2)  to  (t1, s2) */
			t1 = s1 + s2;
			s2 = s2 - (t1 - s1);

			t2 += s2;

			/* Renormalize (t1, t2)  */
			head_rowsum = t1 + t2;
			tail_rowsum = t2 - (head_rowsum - t1);
		      }
		      ap_index += (step + 1) * incap;
		      x_index += incx;
		    }
		    resval = y_i[y_index];
		    {
		      /* Compute double-double = double-double * double. */
		      double	      a11, a21, b1, b2, c11, c21, c2, con,
		      		      t1     , t2;

		      con = head_rowsum * split;
		      a11 = con - head_rowsum;
		      a11 = con - a11;
		      a21 = head_rowsum - a11;
		      con = alpha_i * split;
		      b1 = con - alpha_i;
		      b1 = con - b1;
		      b2 = alpha_i - b1;

		      c11 = head_rowsum * alpha_i;
		      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		      c2 = tail_rowsum * alpha_i;
		      t1 = c11 + c2;
		      t2 = (c2 - (t1 - c11)) + c21;

		      head_tmp1 = t1 + t2;
		      tail_tmp1 = t2 - (head_tmp1 - t1);
		    }
		    {
		      /* Compute double_double = double * double. */
		      double	      a1, a2, b1, b2, con;

		      con = beta_i * split;
		      a1 = con - beta_i;
		      a1 = con - a1;
		      a2 = beta_i - a1;
		      con = resval * split;
		      b1 = con - resval;
		      b1 = con - b1;
		      b2 = resval - b1;

		      head_tmp2 = beta_i * resval;
		      tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
		    }
		    {
		      /*
		       * Compute double-double = double-double +
		       * double-double.
		       */
		      double	      bv;
		      double	      s1, s2, t1, t2;

		      /* Add two hi words. */
		      s1 = head_tmp1 + head_tmp2;
		      bv = s1 - head_tmp1;
		      s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

		      /* Add two lo words. */
		      t1 = tail_tmp1 + tail_tmp2;
		      bv = t1 - tail_tmp1;
		      t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

		      s2 += t1;

		      /* Renormalize (s1, s2)  to  (t1, s2) */
		      t1 = s1 + s2;
		      s2 = s2 - (t1 - s1);

		      t2 += s2;

		      /* Renormalize (t1, t2)  */
		      head_tmp2 = t1 + t2;
		      tail_tmp2 = t2 - (head_tmp2 - t1);
		    }
		    y_i[y_index] = head_tmp2;

		    y_index += incy;
		    ap_start += (matrix_row + 1) * incap;
		  }
		}
	      }
	    }
	  }			/* if order == ... */
	}			/* alpha != 0 */

	FPU_FIX_STOP;
      }
      break;
    }

  }
}
Exemplo n.º 23
0
void		BLAS_dtbsv_s(enum blas_order_type order, enum blas_uplo_type uplo,
		   		enum		blas_trans_type trans, enum blas_diag_type diag,
		   		int		n       , int k, double alpha, const float *t, int ldt,
		   		double       *x, int incx)
/*
 * Purpose
 * =======
 *
 * This routine solves :
 *
 *     x <- alpha * inverse(t) * x
 *
 * Arguments
 * =========
 *
 * order  (input) enum blas_order_type
 *        column major, row major (blas_rowmajor, blas_colmajor)
 *
 * uplo   (input) enum blas_uplo_type
 *        upper, lower (blas_upper, blas_lower)
 *
 * trans  (input) enum blas_trans_type
 *        no trans, trans, conj trans
 *
 * diag   (input) enum blas_diag_type
 *        unit, non unit (blas_unit_diag, blas_non_unit_diag)
 *
 * n      (input) int
 *        the dimension of t
 *
 * k      (input) int
 *        the number of subdiagonals/superdiagonals of t
 *
 * alpha  (input) double
 *
 * t      (input) float*
 *        Triangular Banded matrix
 *
 * x      (input) const double*
 *           Array of length n.
 *
 * incx   (input) int
 *           The stride used to access components x[i].
 *
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_dtbsv_s";

  int		  i       , j;	/* used to keep track of loop counts */
  int		  xi;		/* used to index vector x */
  int		  start_xi;	/* used as the starting idx to vector x */
  int		  incxi;
  int		  Tij;		/* index inside of Banded structure */
  int		  dot_start, dot_start_inc1, dot_start_inc2, dot_inc;

  const float    *t_i = t;	/* internal matrix t */
  double         *x_i = x;	/* internal x */
  double	  alpha_i = alpha;	/* internal alpha */

  if (order != blas_rowmajor && order != blas_colmajor) {
    BLAS_error(routine_name, -1, order, 0);
  }
  if (uplo != blas_upper && uplo != blas_lower) {
    BLAS_error(routine_name, -2, uplo, 0);
  }
  if ((trans != blas_trans) && (trans != blas_no_trans) &&
      (trans != blas_conj) && (trans != blas_conj_trans)) {
    BLAS_error(routine_name, -2, uplo, 0);
  }
  if (diag != blas_non_unit_diag && diag != blas_unit_diag) {
    BLAS_error(routine_name, -4, diag, 0);
  }
  if (n < 0) {
    BLAS_error(routine_name, -5, n, 0);
  }
  if (k >= n) {
    BLAS_error(routine_name, -6, k, 0);
  }
  if ((ldt < 1) || (ldt <= k)) {
    BLAS_error(routine_name, -9, ldt, 0);
  }
  if (incx == 0) {
    BLAS_error(routine_name, -11, incx, 0);
  }
  if (n <= 0)
    return;

  incxi = incx;


  /* configuring the vector starting idx */
  if (incxi < 0) {
    start_xi = (1 - n) * incxi;
  } else {
    start_xi = 0;
  }

  /* if alpha is zero, then return x as a zero vector */
  if (alpha_i == 0.0) {
    xi = start_xi;
    for (i = 0; i < n; i++) {
      x_i[xi] = 0.0;
      xi += incxi;
    }
    return;
  }
  /* check to see if k=0.  if so, we can optimize somewhat */
  if (k == 0) {
    if ((alpha_i == 1.0) && (diag == blas_unit_diag)) {
      /* nothing to do */
      return;
    } else {
      /* just run the loops as is. */

    }
  }
  /* get index variables prepared */
  if (((trans == blas_trans) || (trans == blas_conj_trans)) ^
      (order == blas_rowmajor)) {
    dot_start = k;
  } else {
    dot_start = 0;
  }

  if (((trans == blas_trans) || (trans == blas_conj_trans)) ^
      (order == blas_rowmajor)) {
    dot_inc = 1;
    dot_start_inc1 = ldt - 1;
    dot_start_inc2 = ldt;
  } else {
    dot_inc = ldt - 1;
    dot_start_inc1 = 1;
    dot_start_inc2 = ldt;
  }

  if (((trans == blas_trans) || (trans == blas_conj_trans)) ^
      (uplo == blas_lower)) {
    /* start at the first element of x */
    /* substitution will proceed forwards (forwardsubstitution) */
  } else {
    /* start at the last element of x */
    /* substitution will proceed backwards (backsubstitution) */
    dot_inc = -dot_inc;
    dot_start_inc1 = -dot_start_inc1;
    dot_start_inc2 = -dot_start_inc2;
    dot_start = ldt * (n - 1) + k - dot_start;
    /* order of the following 2 statements matters! */
    start_xi = start_xi + (n - 1) * incxi;
    incxi = -incxi;
  }







  {

    {
      double	      temp1;	/* temporary variable for calculations */
      double	      temp2;	/* temporary variable for calculations */
      double	      x_elem;
      float	      T_element;






      /* loop 1 */
      xi = start_xi;
      for (j = 0; j < k; j++) {

	/* each time through loop, xi lands on next x to compute. */
	x_elem = x_i[xi];
	/*
	 * preform the multiplication - in this implementation we do not
	 * seperate the alpha = 1 case
	 */
	temp1 = x_elem * alpha_i;

	xi = start_xi;

	Tij = dot_start;
	dot_start += dot_start_inc1;

	for (i = j; i > 0; i--) {
	  T_element = t_i[Tij];

	  x_elem = x_i[xi];
	  temp2 = x_elem * T_element;
	  temp1 = temp1 + (-temp2);
	  xi += incxi;
	  Tij += dot_inc;
	}			/* for across row */


	/*
	 * if the diagonal entry is not equal to one, then divide Xj by the
	 * entry
	 */
	if (diag == blas_non_unit_diag) {
	  T_element = t_i[Tij];


	  temp1 = temp1 / T_element;

	}			/* if (diag == blas_non_unit_diag) */
	x_i[xi] = temp1;
	xi += incxi;
      }				/* for j<k */
      /* end loop 1 */

      /* loop 2 continue without changing j to start */
      for (; j < n; j++) {

	/* each time through loop, xi lands on next x to compute. */
	x_elem = x_i[xi];
	temp1 = x_elem * alpha_i;

	xi = start_xi;
	start_xi += incxi;

	Tij = dot_start;
	dot_start += dot_start_inc2;

	for (i = k; i > 0; i--) {
	  T_element = t_i[Tij];

	  x_elem = x_i[xi];
	  temp2 = x_elem * T_element;
	  temp1 = temp1 + (-temp2);
	  xi += incxi;
	  Tij += dot_inc;
	}			/* for across row */


	/*
	 * if the diagonal entry is not equal to one, then divide by the
	 * entry
	 */
	if (diag == blas_non_unit_diag) {
	  T_element = t_i[Tij];


	  temp1 = temp1 / T_element;

	}			/* if (diag == blas_non_unit_diag) */
	x_i[xi] = temp1;
	xi += incxi;
      }				/* for j<n */


    }
  }
}				/* end BLAS_dtbsv_s */
Exemplo n.º 24
0
void BLAS_dtrmv_x(enum blas_order_type order, enum blas_uplo_type uplo,
		  enum blas_trans_type trans, enum blas_diag_type diag, int n,
		  double alpha, const double *T, int ldt,
		  double *x, int incx, enum blas_prec_type prec)

/*
 * Purpose
 * =======
 *
 * Computes x <-- alpha * T * x, where T is a triangular matrix.
 *
 * Arguments
 * =========
 * 
 * order  (input) enum blas_order_type
 *        column major, row major
 *
 * uplo   (input) enum blas_uplo_type
 *        upper, lower
 *
 * trans  (input) enum blas_trans_type
 *        no trans, trans, conj trans
 * 
 * diag   (input) enum blas_diag_type
 *        unit, non unit
 *
 * n      (input) int
 *        the dimension of T
 * 
 * alpha  (input) double
 * 
 * T      (input) double*
 *        Triangular matrix
 *
 * ldt    (input) int 
 *        Leading dimension of T
 *
 * x      (input) const double*
 *    Array of length n.
 * 
 * incx   (input) int
 *     The stride used to access components x[i].
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  static const char routine_name[] = "BLAS_dtrmv_x";

  switch (prec) {
  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:{

      int i, j;			/* used to idx matrix */
      int xj, xj0;
      int ti, tij, tij0;

      int inc_ti, inc_tij;
      int inc_x;

      const double *T_i = T;	/* internal matrix T */
      double *x_i = x;		/* internal x */
      double alpha_i = alpha;	/* internal alpha */

      double t_elem;
      double x_elem;
      double prod;
      double sum;
      double tmp;



      /* all error calls */
      if ((order != blas_rowmajor && order != blas_colmajor) ||
	  (uplo != blas_upper && uplo != blas_lower) ||
	  (trans != blas_trans &&
	   trans != blas_no_trans &&
	   trans != blas_conj_trans) ||
	  (diag != blas_non_unit_diag && diag != blas_unit_diag) ||
	  (ldt < n) || (incx == 0)) {
	BLAS_error(routine_name, 0, 0, NULL);
      } else if (n <= 0) {
	BLAS_error(routine_name, -4, n, NULL);
      } else if (incx == 0) {
	BLAS_error(routine_name, -9, incx, NULL);
      }

      if (trans == blas_no_trans) {
	if (uplo == blas_upper) {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = ldt;
	    inc_tij = -1;
	  } else {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  }
	} else {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  } else {
	    inc_ti = -1;
	    inc_tij = ldt;
	  }
	}
      } else {
	if (uplo == blas_upper) {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -1;
	    inc_tij = ldt;
	  } else {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  }
	} else {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  } else {
	    inc_ti = ldt;
	    inc_tij = -1;
	  }
	}
      }





      xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x);
      if (alpha_i == 0.0) {
	xj = xj0;
	for (j = 0; j < n; j++) {
	  x_i[xj] = 0.0;
	  xj += inc_x;
	}
      } else {

	if (diag == blas_unit_diag) {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    sum = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < (n - 1); j++) {

	      t_elem = T_i[tij];

	      x_elem = x_i[xj];
	      prod = x_elem * t_elem;
	      sum = sum + prod;

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    x_elem = x_i[xj];
	    sum = sum + x_elem;

	    if (alpha_i == 1.0) {
	      x_i[xj] = sum;
	    } else {
	      tmp = sum * alpha_i;
	      x_i[xj] = tmp;
	    }

	    ti += inc_ti;
	  }

	} else {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    sum = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < n; j++) {

	      t_elem = T_i[tij];

	      x_elem = x_i[xj];
	      prod = x_elem * t_elem;
	      sum = sum + prod;

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    if (alpha_i == 1.0) {
	      x_i[xj - inc_x] = sum;
	    } else {
	      tmp = sum * alpha_i;
	      x_i[xj - inc_x] = tmp;
	    }

	    ti += inc_ti;
	  }

	}

      }



      break;
    }

  case blas_prec_extra:{

      int i, j;			/* used to idx matrix */
      int xj, xj0;
      int ti, tij, tij0;

      int inc_ti, inc_tij;
      int inc_x;

      const double *T_i = T;	/* internal matrix T */
      double *x_i = x;		/* internal x */
      double alpha_i = alpha;	/* internal alpha */

      double t_elem;
      double x_elem;
      double head_prod, tail_prod;
      double head_sum, tail_sum;
      double head_tmp, tail_tmp;

      FPU_FIX_DECL;
      FPU_FIX_START;

      /* all error calls */
      if ((order != blas_rowmajor && order != blas_colmajor) ||
	  (uplo != blas_upper && uplo != blas_lower) ||
	  (trans != blas_trans &&
	   trans != blas_no_trans &&
	   trans != blas_conj_trans) ||
	  (diag != blas_non_unit_diag && diag != blas_unit_diag) ||
	  (ldt < n) || (incx == 0)) {
	BLAS_error(routine_name, 0, 0, NULL);
      } else if (n <= 0) {
	BLAS_error(routine_name, -4, n, NULL);
      } else if (incx == 0) {
	BLAS_error(routine_name, -9, incx, NULL);
      }

      if (trans == blas_no_trans) {
	if (uplo == blas_upper) {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = ldt;
	    inc_tij = -1;
	  } else {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  }
	} else {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  } else {
	    inc_ti = -1;
	    inc_tij = ldt;
	  }
	}
      } else {
	if (uplo == blas_upper) {
	  inc_x = incx;
	  if (order == blas_rowmajor) {
	    inc_ti = -1;
	    inc_tij = ldt;
	  } else {
	    inc_ti = -ldt;
	    inc_tij = 1;
	  }
	} else {
	  inc_x = -incx;
	  if (order == blas_rowmajor) {
	    inc_ti = 1;
	    inc_tij = -ldt;
	  } else {
	    inc_ti = ldt;
	    inc_tij = -1;
	  }
	}
      }





      xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x);
      if (alpha_i == 0.0) {
	xj = xj0;
	for (j = 0; j < n; j++) {
	  x_i[xj] = 0.0;
	  xj += inc_x;
	}
      } else {

	if (diag == blas_unit_diag) {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    head_sum = tail_sum = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < (n - 1); j++) {

	      t_elem = T_i[tij];

	      x_elem = x_i[xj];
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = x_elem * split;
		a1 = con - x_elem;
		a1 = con - a1;
		a2 = x_elem - a1;
		con = t_elem * split;
		b1 = con - t_elem;
		b1 = con - b1;
		b2 = t_elem - b1;

		head_prod = x_elem * t_elem;
		tail_prod =
		  (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_sum + head_prod;
		bv = s1 - head_sum;
		s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_sum + tail_prod;
		bv = t1 - tail_sum;
		t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_sum = t1 + t2;
		tail_sum = t2 - (head_sum - t1);
	      }

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    x_elem = x_i[xj];
	    {
	      /* Compute double-double = double-double + double. */
	      double e, t1, t2;

	      /* Knuth trick. */
	      t1 = head_sum + x_elem;
	      e = t1 - head_sum;
	      t2 = ((x_elem - e) + (head_sum - (t1 - e))) + tail_sum;

	      /* The result is t1 + t2, after normalization. */
	      head_sum = t1 + t2;
	      tail_sum = t2 - (head_sum - t1);
	    }

	    if (alpha_i == 1.0) {
	      x_i[xj] = head_sum;
	    } else {
	      {
		/* Compute double-double = double-double * double. */
		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

		con = head_sum * split;
		a11 = con - head_sum;
		a11 = con - a11;
		a21 = head_sum - a11;
		con = alpha_i * split;
		b1 = con - alpha_i;
		b1 = con - b1;
		b2 = alpha_i - b1;

		c11 = head_sum * alpha_i;
		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		c2 = tail_sum * alpha_i;
		t1 = c11 + c2;
		t2 = (c2 - (t1 - c11)) + c21;

		head_tmp = t1 + t2;
		tail_tmp = t2 - (head_tmp - t1);
	      }
	      x_i[xj] = head_tmp;
	    }

	    ti += inc_ti;
	  }

	} else {


	  ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
	  tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
	  for (i = 0; i < n; i++) {

	    head_sum = tail_sum = 0.0;

	    xj = xj0;
	    tij = ti + tij0;
	    for (j = i; j < n; j++) {

	      t_elem = T_i[tij];

	      x_elem = x_i[xj];
	      {
		/* Compute double_double = double * double. */
		double a1, a2, b1, b2, con;

		con = x_elem * split;
		a1 = con - x_elem;
		a1 = con - a1;
		a2 = x_elem - a1;
		con = t_elem * split;
		b1 = con - t_elem;
		b1 = con - b1;
		b2 = t_elem - b1;

		head_prod = x_elem * t_elem;
		tail_prod =
		  (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double bv;
		double s1, s2, t1, t2;

		/* Add two hi words. */
		s1 = head_sum + head_prod;
		bv = s1 - head_sum;
		s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_sum + tail_prod;
		bv = t1 - tail_sum;
		t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_sum = t1 + t2;
		tail_sum = t2 - (head_sum - t1);
	      }

	      xj += inc_x;
	      tij += inc_tij;
	    }

	    if (alpha_i == 1.0) {
	      x_i[xj - inc_x] = head_sum;
	    } else {
	      {
		/* Compute double-double = double-double * double. */
		double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

		con = head_sum * split;
		a11 = con - head_sum;
		a11 = con - a11;
		a21 = head_sum - a11;
		con = alpha_i * split;
		b1 = con - alpha_i;
		b1 = con - b1;
		b2 = alpha_i - b1;

		c11 = head_sum * alpha_i;
		c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

		c2 = tail_sum * alpha_i;
		t1 = c11 + c2;
		t2 = (c2 - (t1 - c11)) + c21;

		head_tmp = t1 + t2;
		tail_tmp = t2 - (head_tmp - t1);
	      }
	      x_i[xj - inc_x] = head_tmp;
	    }

	    ti += inc_ti;
	  }

	}

      }

      FPU_FIX_STOP;

      break;
    }
  }
}
void		BLAS_dwaxpby_s_s_x(int n, double alpha, const float *x, int incx,
	 		double	beta  , const float *y, int incy, double *w,
	  		int		incw    , enum blas_prec_type prec)
/*
 * Purpose
 * =======
 *
 * This routine computes:
 *
 *     w <- alpha * x + beta * y
 *
 * Arguments
 * =========
 *
 * n     (input) int
 *       The length of vectors x, y, and w.
 *
 * alpha (input) double
 *
 * x     (input) const float*
 *       Array of length n.
 *
 * incx  (input) int
 *       The stride used to access components x[i].
 *
 * beta  (input) double
 *
 * y     (input) float*
 *       Array of length n.
 *
 * incy  (input) int
 *       The stride used to access components y[i].
 *
 * w     (output) double*
 *       Array of length n.
 *
 * incw  (input) int
 *       The stride used to write components w[i].
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  char           *routine_name = "BLAS_dwaxpby_s_s_x";
  switch (prec) {
  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:{

      int	      i    , ix = 0, iy = 0, iw = 0;
      double         *w_i = w;
      const float    *x_i = x;
      const float    *y_i = y;
      double	      alpha_i = alpha;
      double	      beta_i = beta;
      float	      x_ii;
      float	      y_ii;
      double	      tmpx;
      double	      tmpy;



      /* Test the input parameters. */
      if (incx == 0)
	BLAS_error(routine_name, -4, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -7, incy, NULL);
      else if (incw == 0)
	BLAS_error(routine_name, -9, incw, NULL);


      /* Immediate return */
      if (n <= 0) {
	return;
      }
      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;
      if (incw < 0)
	iw = (-n + 1) * incw;

      for (i = 0; i < n; ++i) {
	x_ii = x_i[ix];
	y_ii = y_i[iy];
	tmpx = alpha_i * x_ii;	/* tmpx  = alpha * x[ix] */
	tmpy = beta_i * y_ii;	/* tmpy = beta * y[iy] */
	tmpy = tmpy + tmpx;
	w_i[iw] = tmpy;
	ix += incx;
	iy += incy;
	iw += incw;
      }				/* endfor */



      break;
    }

  case blas_prec_extra:{

      int	      i    , ix = 0, iy = 0, iw = 0;
      double         *w_i = w;
      const float    *x_i = x;
      const float    *y_i = y;
      double	      alpha_i = alpha;
      double	      beta_i = beta;
      float	      x_ii;
      float	      y_ii;
      double	      head_tmpx, tail_tmpx;
      double	      head_tmpy, tail_tmpy;

      FPU_FIX_DECL;

      /* Test the input parameters. */
      if (incx == 0)
	BLAS_error(routine_name, -4, incx, NULL);
      else if (incy == 0)
	BLAS_error(routine_name, -7, incy, NULL);
      else if (incw == 0)
	BLAS_error(routine_name, -9, incw, NULL);


      /* Immediate return */
      if (n <= 0) {
	return;
      }
      FPU_FIX_START;




      if (incx < 0)
	ix = (-n + 1) * incx;
      if (incy < 0)
	iy = (-n + 1) * incy;
      if (incw < 0)
	iw = (-n + 1) * incw;

      for (i = 0; i < n; ++i) {
	x_ii = x_i[ix];
	y_ii = y_i[iy];
	{
	  double	  dt = (double)x_ii;
	  {
	    /* Compute double_double = double * double. */
	    double	    a1  , a2, b1, b2, con;

	    con = alpha_i * split;
	    a1 = con - alpha_i;
	    a1 = con - a1;
	    a2 = alpha_i - a1;
	    con = dt * split;
	    b1 = con - dt;
	    b1 = con - b1;
	    b2 = dt - b1;

	    head_tmpx = alpha_i * dt;
	    tail_tmpx = (((a1 * b1 - head_tmpx) + a1 * b2) + a2 * b1) + a2 * b2;
	  }
	}			/* tmpx  = alpha * x[ix] */
	{
	  double	  dt = (double)y_ii;
	  {
	    /* Compute double_double = double * double. */
	    double	    a1  , a2, b1, b2, con;

	    con = beta_i * split;
	    a1 = con - beta_i;
	    a1 = con - a1;
	    a2 = beta_i - a1;
	    con = dt * split;
	    b1 = con - dt;
	    b1 = con - b1;
	    b2 = dt - b1;

	    head_tmpy = beta_i * dt;
	    tail_tmpy = (((a1 * b1 - head_tmpy) + a1 * b2) + a2 * b1) + a2 * b2;
	  }
	}			/* tmpy = beta * y[iy] */
	{
	  /* Compute double-double = double-double + double-double. */
	  double	  bv;
	  double	  s1    , s2, t1, t2;

	  /* Add two hi words. */
	  s1 = head_tmpy + head_tmpx;
	  bv = s1 - head_tmpy;
	  s2 = ((head_tmpx - bv) + (head_tmpy - (s1 - bv)));

	  /* Add two lo words. */
	  t1 = tail_tmpy + tail_tmpx;
	  bv = t1 - tail_tmpy;
	  t2 = ((tail_tmpx - bv) + (tail_tmpy - (t1 - bv)));

	  s2 += t1;

	  /* Renormalize (s1, s2)  to  (t1, s2) */
	  t1 = s1 + s2;
	  s2 = s2 - (t1 - s1);

	  t2 += s2;

	  /* Renormalize (t1, t2)  */
	  head_tmpy = t1 + t2;
	  tail_tmpy = t2 - (head_tmpy - t1);
	}
	w_i[iw] = head_tmpy;
	ix += incx;
	iy += incy;
	iw += incw;
      }				/* endfor */

      FPU_FIX_STOP;

      break;
    }
  }
}
void		BLAS_dge_sum_mv_s_d(enum blas_order_type order, int m, int n,
		      		double	alpha , const float *a, int lda,
			  		const		double *x, int incx,
		      		double	beta  , const float *b, int ldb,
			  		double       *y, int incy)
/*
 * Purpose
 * =======
 *
 * Computes y = alpha * A * x + beta * B * y,
 *     where A, B are general matricies.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of A; row or column major
 *
 * m            (input) int
 *              Row Dimension of A, B, length of output vector y
 *
 * n            (input) int
 *              Column Dimension of A, B and the length of vector x
 *
 * alpha        (input) double
 *
 * A            (input) const float*
 *
 * lda          (input) int
 *              Leading dimension of A
 *
 * x            (input) const double*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) double
 *
 * b            (input) const float*
 *
 * ldb          (input) int
 *              Leading dimension of B
 *
 * y            (input/output) double*
 *
 * incy         (input) int
 *              The stride for vector y.
 *
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_dge_sum_mv_s_d";
  int		  i       , j;
  int		  xi      , yi;
  int		  x_starti, y_starti, incxi, incyi;
  int		  lda_min;
  int		  ai;
  int		  incai;
  int		  aij;
  int		  incaij;
  int		  bi;
  int		  incbi;
  int		  bij;
  int		  incbij;

  const float    *a_i = a;
  const float    *b_i = b;
  const double   *x_i = x;
  double         *y_i = y;
  double	  alpha_i = alpha;
  double	  beta_i = beta;
  float		  a_elem;
  float		  b_elem;
  double	  x_elem;
  double	  prod;
  double	  sumA;
  double	  sumB;
  double	  tmp1;
  double	  tmp2;



  /* m is number of rows */
  /* n is number of columns */

  if (m == 0 || n == 0)
    return;


  /* all error calls */
  if (order == blas_rowmajor) {
    lda_min = n;
    incai = lda;		/* row stride */
    incbi = ldb;
    incbij = incaij = 1;	/* column stride */
  } else if (order == blas_colmajor) {
    lda_min = m;
    incai = incbi = 1;		/* row stride */
    incaij = lda;		/* column stride */
    incbij = ldb;
  } else {
    /* error, order not blas_colmajor not blas_rowmajor */
    BLAS_error(routine_name, -1, order, 0);
    return;
  }

  if (m < 0)
    BLAS_error(routine_name, -2, m, 0);
  else if (n < 0)
    BLAS_error(routine_name, -3, n, 0);
  if (lda < lda_min)
    BLAS_error(routine_name, -6, lda, 0);
  else if (ldb < lda_min)
    BLAS_error(routine_name, -11, ldb, 0);
  else if (incx == 0)
    BLAS_error(routine_name, -8, incx, 0);
  else if (incy == 0)
    BLAS_error(routine_name, -13, incy, 0);

  incxi = incx;
  incyi = incy;







  if (incxi > 0)
    x_starti = 0;
  else
    x_starti = (1 - n) * incxi;

  if (incyi > 0)
    y_starti = 0;
  else
    y_starti = (1 - m) * incyi;



  if (alpha_i == 0.0) {
    if (beta_i == 0.0) {
      /* alpha, beta are 0.0 */
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	y_i[yi] = 0.0;
      }
    } else if (beta_i == 1.0) {
      /* alpha is 0.0, beta is 1.0 */


      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {

	sumB = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];

	  b_elem = b_i[bij];
	  prod = b_elem * x_elem;
	  sumB = sumB + prod;
	  bij += incbij;
	}
	/* now put the result into y_i */
	y_i[yi] = sumB;

	bi += incbi;
      }
    } else {
      /* alpha is 0.0, beta not 1.0 nor 0.0 */


      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {

	sumB = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];

	  b_elem = b_i[bij];
	  prod = b_elem * x_elem;
	  sumB = sumB + prod;
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1 = sumB * beta_i;
	y_i[yi] = tmp1;

	bi += incbi;
      }
    }
  } else if (alpha_i == 1.0) {
    if (beta_i == 0.0) {
      /* alpha is 1.0, beta is 0.0 */

      ai = 0;

      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA = 0.0;
	aij = ai;

	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];
	  a_elem = a_i[aij];
	  prod = a_elem * x_elem;
	  sumA = sumA + prod;
	  aij += incaij;

	}
	/* now put the result into y_i */
	y_i[yi] = sumA;
	ai += incai;

      }
    } else if (beta_i == 1.0) {
      /* alpha is 1.0, beta is 1.0 */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA = 0.0;
	aij = ai;
	sumB = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];
	  a_elem = a_i[aij];
	  prod = a_elem * x_elem;
	  sumA = sumA + prod;
	  aij += incaij;
	  b_elem = b_i[bij];
	  prod = b_elem * x_elem;
	  sumB = sumB + prod;
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1 = sumA;
	tmp2 = sumB;
	tmp1 = tmp1 + tmp2;
	y_i[yi] = tmp1;
	ai += incai;
	bi += incbi;
      }
    } else {
      /* alpha is 1.0, beta is other */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA = 0.0;
	aij = ai;
	sumB = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];
	  a_elem = a_i[aij];
	  prod = a_elem * x_elem;
	  sumA = sumA + prod;
	  aij += incaij;
	  b_elem = b_i[bij];
	  prod = b_elem * x_elem;
	  sumB = sumB + prod;
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1 = sumA;
	tmp2 = sumB * beta_i;
	tmp1 = tmp1 + tmp2;
	y_i[yi] = tmp1;
	ai += incai;
	bi += incbi;
      }
    }
  } else {
    if (beta_i == 0.0) {
      /* alpha is other, beta is 0.0 */

      ai = 0;

      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA = 0.0;
	aij = ai;

	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];
	  a_elem = a_i[aij];
	  prod = a_elem * x_elem;
	  sumA = sumA + prod;
	  aij += incaij;

	}
	/* now put the result into y_i */
	tmp1 = sumA * alpha_i;
	y_i[yi] = tmp1;
	ai += incai;

      }
    } else if (beta_i == 1.0) {
      /* alpha is other, beta is 1.0 */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA = 0.0;
	aij = ai;
	sumB = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];
	  a_elem = a_i[aij];
	  prod = a_elem * x_elem;
	  sumA = sumA + prod;
	  aij += incaij;
	  b_elem = b_i[bij];
	  prod = b_elem * x_elem;
	  sumB = sumB + prod;
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1 = sumA * alpha_i;
	tmp2 = sumB;
	tmp1 = tmp1 + tmp2;
	y_i[yi] = tmp1;
	ai += incai;
	bi += incbi;
      }
    } else {
      /* most general form, alpha, beta are other */

      ai = 0;
      bi = 0;
      for (i = 0, yi = y_starti; i < m; i++, yi += incyi) {
	sumA = 0.0;
	aij = ai;
	sumB = 0.0;
	bij = bi;
	for (j = 0, xi = x_starti; j < n; j++, xi += incxi) {
	  x_elem = x_i[xi];
	  a_elem = a_i[aij];
	  prod = a_elem * x_elem;
	  sumA = sumA + prod;
	  aij += incaij;
	  b_elem = b_i[bij];
	  prod = b_elem * x_elem;
	  sumB = sumB + prod;
	  bij += incbij;
	}
	/* now put the result into y_i */
	tmp1 = sumA * alpha_i;
	tmp2 = sumB * beta_i;
	tmp1 = tmp1 + tmp2;
	y_i[yi] = tmp1;
	ai += incai;
	bi += incbi;
      }
    }
  }


}				/* end BLAS_dge_sum_mv_s_d */
Exemplo n.º 27
0
void		BLAS_zhemv_z_c(enum blas_order_type order, enum blas_uplo_type uplo,
		     		int		n       , const void *alpha, const void *a, int lda,
	    		const		void  *x, int incx, const void *beta,
		     		void         *y, int incy)
/*
 * Purpose
 * =======
 *
 * This routines computes the matrix product:
 *
 *     y  <-  alpha * A * x  +  beta * y
 *
 * where A is a Hermitian matrix.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input hermitian matrix A.
 *
 * uplo    (input) enum blas_uplo_type
 *         Determines which half of matrix A (upper or lower triangle)
 *         is accessed.
 *
 * n       (input) int
 *         Dimension of A and size of vectors x, y.
 *
 * alpha   (input) const void*
 *
 * a       (input) void*
 *         Matrix A.
 *
 * lda     (input) int
 *         Leading dimension of matrix A.
 *
 * x       (input) void*
 *         Vector x.
 *
 * incx    (input) int
 *         Stride for vector x.
 *
 * beta    (input) const void*
 *
 * y       (input/output) void*
 *         Vector y.
 *
 * incy    (input) int
 *         Stride for vector y.
 *
 */
{
  /* Routine name */
  static const char routine_name[] = "BLAS_zhemv_z_c";

  /* Integer Index Variables */
  int		  i       , k;

  int		  xi      , yi;
  int		  aik     , astarti, x_starti, y_starti;

  int		  incai;
  int		  incaik  , incaik2;

  int		  n_i;

  /* Input Matrices */
  const double   *a_i = (double *)a;
  const float    *x_i = (float *)x;

  /* Output Vector */
  double         *y_i = (double *)y;

  /* Input Scalars */
  double         *alpha_i = (double *)alpha;
  double         *beta_i = (double *)beta;

  /* Temporary Floating-Point Variables */
  double	  a_elem [2];
  float		  x_elem [2];
  double	  y_elem [2];
  double	  prod   [2];
  double	  sum    [2];
  double	  tmp1   [2];
  double	  tmp2   [2];



  /* Test for no-op */
  if (n <= 0) {
    return;
  }
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
    return;
  }
  /* Check for error conditions. */
  if (order != blas_colmajor && order != blas_rowmajor) {
    BLAS_error(routine_name, -1, order, NULL);
  }
  if (uplo != blas_upper && uplo != blas_lower) {
    BLAS_error(routine_name, -2, uplo, NULL);
  }
  if (lda < n) {
    BLAS_error(routine_name, -3, n, NULL);
  }
  if (incx == 0) {
    BLAS_error(routine_name, -8, incx, NULL);
  }
  if (incy == 0) {
    BLAS_error(routine_name, -11, incy, NULL);
  }
  /* Set Index Parameters */
  n_i = n;

  if ((order == blas_colmajor && uplo == blas_upper) ||
      (order == blas_rowmajor && uplo == blas_lower)) {
    incai = lda;
    incaik = 1;
    incaik2 = lda;
  } else {
    incai = 1;
    incaik = lda;
    incaik2 = 1;
  }
  /* Adjustment to increments (if any) */
  incx *= 2;
  incy *= 2;
  incai *= 2;
  incaik *= 2;
  incaik2 *= 2;
  if (incx < 0) {
    x_starti = (-n_i + 1) * incx;
  } else {
    x_starti = 0;
  }
  if (incy < 0) {
    y_starti = (-n_i + 1) * incy;
  } else {
    y_starti = 0;
  }



  /* alpha = 0.  In this case, just return beta * y */
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
    for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
      y_elem[0] = y_i[yi];
      y_elem[1] = y_i[yi + 1];
      {
	tmp1[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	tmp1[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
      }
      y_i[yi] = tmp1[0];
      y_i[yi + 1] = tmp1[1];
    }
  } else {
    /* determine whether we conjugate in first loop or second loop */

    if (uplo == blas_lower) {
      /* conjugate second */

      /* Case alpha == 1. */
      if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {

	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incaik2) {
	    sum[0] = sum[1] = 0.0;
	    for (k = 0, aik = astarti, xi = x_starti;
		 k < i; k++, aik += incaik, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    k++;
	    aik += incaik2;
	    xi += incx;
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_i[yi] = sum[0];
	    y_i[yi + 1] = sum[1];
	  }
	} else {
	  /*
	   * Case alpha = 1, but beta != 0. We compute  y  <--- A * x + beta *
	   * y
	   */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incaik2) {
	    sum[0] = sum[1] = 0.0;

	    for (k = 0, aik = astarti, xi = x_starti;
		 k < i; k++, aik += incaik, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    k++;
	    aik += incaik2;
	    xi += incx;
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_elem[0] = y_i[yi];
	    y_elem[1] = y_i[yi + 1];
	    {
	      tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	      tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	    }
	    tmp1[0] = sum[0];
	    tmp1[1] = sum[1];
	    tmp1[0] = tmp2[0] + tmp1[0];
	    tmp1[1] = tmp2[1] + tmp1[1];
	    y_i[yi] = tmp1[0];
	    y_i[yi + 1] = tmp1[1];
	  }
	}
      } else {
	/* The most general form,   y <--- alpha * A * x + beta * y */
	for (i = 0, yi = y_starti, astarti = 0;
	     i < n_i; i++, yi += incy, astarti += incaik2) {
	  sum[0] = sum[1] = 0.0;

	  for (k = 0, aik = astarti, xi = x_starti;
	       k < i; k++, aik += incaik, xi += incx) {
	    a_elem[0] = a_i[aik];
	    a_elem[1] = a_i[aik + 1];

	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
	      prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  a_elem[0] = a_i[aik];
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  {
	    prod[0] = x_elem[0] * a_elem[0];
	    prod[1] = x_elem[1] * a_elem[0];
	  }
	  sum[0] = sum[0] + prod[0];
	  sum[1] = sum[1] + prod[1];
	  k++;
	  aik += incaik2;
	  xi += incx;
	  for (; k < n_i; k++, aik += incaik2, xi += incx) {
	    a_elem[0] = a_i[aik];
	    a_elem[1] = a_i[aik + 1];
	    a_elem[1] = -a_elem[1];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
	      prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	    tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	  }
	  {
	    tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
	    tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	  }
	  tmp1[0] = tmp2[0] + tmp1[0];
	  tmp1[1] = tmp2[1] + tmp1[1];
	  y_i[yi] = tmp1[0];
	  y_i[yi + 1] = tmp1[1];
	}
      }
    } else {
      /* conjugate first loop */

      /* Case alpha == 1. */
      if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {

	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incaik2) {
	    sum[0] = sum[1] = 0.0;
	    for (k = 0, aik = astarti, xi = x_starti;
		 k < i; k++, aik += incaik, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    k++;
	    aik += incaik2;
	    xi += incx;
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_i[yi] = sum[0];
	    y_i[yi + 1] = sum[1];
	  }
	} else {
	  /*
	   * Case alpha = 1, but beta != 0. We compute  y  <--- A * x + beta *
	   * y
	   */
	  for (i = 0, yi = y_starti, astarti = 0;
	       i < n_i; i++, yi += incy, astarti += incaik2) {
	    sum[0] = sum[1] = 0.0;

	    for (k = 0, aik = astarti, xi = x_starti;
		 k < i; k++, aik += incaik, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    a_elem[0] = a_i[aik];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = x_elem[0] * a_elem[0];
	      prod[1] = x_elem[1] * a_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    k++;
	    aik += incaik2;
	    xi += incx;
	    for (; k < n_i; k++, aik += incaik2, xi += incx) {
	      a_elem[0] = a_i[aik];
	      a_elem[1] = a_i[aik + 1];

	      x_elem[0] = x_i[xi];
	      x_elem[1] = x_i[xi + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	    }
	    y_elem[0] = y_i[yi];
	    y_elem[1] = y_i[yi + 1];
	    {
	      tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	      tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	    }
	    tmp1[0] = sum[0];
	    tmp1[1] = sum[1];
	    tmp1[0] = tmp2[0] + tmp1[0];
	    tmp1[1] = tmp2[1] + tmp1[1];
	    y_i[yi] = tmp1[0];
	    y_i[yi + 1] = tmp1[1];
	  }
	}
      } else {
	/* The most general form,   y <--- alpha * A * x + beta * y */
	for (i = 0, yi = y_starti, astarti = 0;
	     i < n_i; i++, yi += incy, astarti += incaik2) {
	  sum[0] = sum[1] = 0.0;

	  for (k = 0, aik = astarti, xi = x_starti;
	       k < i; k++, aik += incaik, xi += incx) {
	    a_elem[0] = a_i[aik];
	    a_elem[1] = a_i[aik + 1];
	    a_elem[1] = -a_elem[1];
	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
	      prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  a_elem[0] = a_i[aik];
	  x_elem[0] = x_i[xi];
	  x_elem[1] = x_i[xi + 1];
	  {
	    prod[0] = x_elem[0] * a_elem[0];
	    prod[1] = x_elem[1] * a_elem[0];
	  }
	  sum[0] = sum[0] + prod[0];
	  sum[1] = sum[1] + prod[1];
	  k++;
	  aik += incaik2;
	  xi += incx;
	  for (; k < n_i; k++, aik += incaik2, xi += incx) {
	    a_elem[0] = a_i[aik];
	    a_elem[1] = a_i[aik + 1];

	    x_elem[0] = x_i[xi];
	    x_elem[1] = x_i[xi + 1];
	    {
	      prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
	      prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	  }
	  y_elem[0] = y_i[yi];
	  y_elem[1] = y_i[yi + 1];
	  {
	    tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	    tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	  }
	  {
	    tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
	    tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	  }
	  tmp1[0] = tmp2[0] + tmp1[0];
	  tmp1[1] = tmp2[1] + tmp1[1];
	  y_i[yi] = tmp1[0];
	  y_i[yi + 1] = tmp1[1];
	}
      }
    }
  }


}				/* end BLAS_zhemv_z_c */
Exemplo n.º 28
0
void		BLAS_dgemm_s_d_x(enum blas_order_type order, enum blas_trans_type transa,
       		enum		blas_trans_type transb, int m, int n, int k,
		       		double	alpha , const float *a, int lda, const double *b, int ldb,
      		double	beta  , double *c, int ldc, enum blas_prec_type prec)
/*
 * Purpose
 * =======
 *
 * This routine computes the matrix product:
 *
 *      C   <-  alpha * op(A) * op(B)  +  beta * C .
 *
 * where op(M) represents either M, M transpose,
 * or M conjugate transpose.
 *
 * Arguments
 * =========
 *
 * order   (input) enum blas_order_type
 *         Storage format of input matrices A, B, and C.
 *
 * transa  (input) enum blas_trans_type
 *         Operation to be done on matrix A before multiplication.
 *         Can be no operation, transposition, or conjugate transposition.
 *
 * transb  (input) enum blas_trans_type
 *         Operation to be done on matrix B before multiplication.
 *         Can be no operation, transposition, or conjugate transposition.
 *
 * m n k   (input) int
 *         The dimensions of matrices A, B, and C.
 *         Matrix C is m-by-n matrix.
 *         Matrix A is m-by-k if A is not transposed,
 *                     k-by-m otherwise.
 *         Matrix B is k-by-n if B is not transposed,
 *                     n-by-k otherwise.
 *
 * alpha   (input) double
 *
 * a       (input) const float*
 *         matrix A.
 *
 * lda     (input) int
 *         leading dimension of A.
 *
 * b       (input) const double*
 *         matrix B
 *
 * ldb     (input) int
 *         leading dimension of B.
 *
 * beta    (input) double
 *
 * c       (input/output) double*
 *         matrix C
 *
 * ldc     (input) int
 *         leading dimension of C.
 *
 * prec   (input) enum blas_prec_type
 *        Specifies the internal precision to be used.
 *        = blas_prec_single: single precision.
 *        = blas_prec_double: double precision.
 *        = blas_prec_extra : anything at least 1.5 times as accurate
 *                            than double, and wider than 80-bits.
 *                            We use double-double in our implementation.
 *
 */
{
  static const char routine_name[] = "BLAS_dgemm_s_d_x";
  switch (prec) {

  case blas_prec_single:
  case blas_prec_double:
  case blas_prec_indigenous:{


      /* Integer Index Variables */
      int	      i    , j, h;

      int	      ai   , bj, ci;
      int	      aih  , bhj, cij;	/* Index into matrices a, b, c during
					 * multiply */

      int	      incai, incaih;	/* Index increments for matrix a */
      int	      incbj, incbhj;	/* Index increments for matrix b */
      int	      incci, inccij;	/* Index increments for matrix c */

      /* Input Matrices */
      const float    *a_i = a;
      const double   *b_i = b;

      /* Output Matrix */
      double         *c_i = c;

      /* Input Scalars */
      double	      alpha_i = alpha;
      double	      beta_i = beta;

      /* Temporary Floating-Point Variables */
      float	      a_elem;
      double	      b_elem;
      double	      c_elem;
      double	      prod;
      double	      sum;
      double	      tmp1;
      double	      tmp2;



      /* Test for error conditions */
      if (m < 0)
	BLAS_error(routine_name, -4, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -5, n, NULL);
      if (k < 0)
	BLAS_error(routine_name, -6, k, NULL);

      if (order == blas_colmajor) {

	if (ldc < m)
	  BLAS_error(routine_name, -14, ldc, NULL);

	if (transa == blas_no_trans) {
	  if (lda < m)
	    BLAS_error(routine_name, -9, lda, NULL);
	} else {
	  if (lda < k)
	    BLAS_error(routine_name, -9, lda, NULL);
	}

	if (transb == blas_no_trans) {
	  if (ldb < k)
	    BLAS_error(routine_name, -11, ldb, NULL);
	} else {
	  if (ldb < n)
	    BLAS_error(routine_name, -11, ldb, NULL);
	}

      } else {
	/* row major */
	if (ldc < n)
	  BLAS_error(routine_name, -14, ldc, NULL);

	if (transa == blas_no_trans) {
	  if (lda < k)
	    BLAS_error(routine_name, -9, lda, NULL);
	} else {
	  if (lda < m)
	    BLAS_error(routine_name, -9, lda, NULL);
	}

	if (transb == blas_no_trans) {
	  if (ldb < n)
	    BLAS_error(routine_name, -11, ldb, NULL);
	} else {
	  if (ldb < k)
	    BLAS_error(routine_name, -11, ldb, NULL);
	}
      }

      /* Test for no-op */
      if (n == 0 || m == 0 || k == 0)
	return;
      if (alpha_i == 0.0 && beta_i == 1.0) {
	return;
      }
      /* Set Index Parameters */
      if (order == blas_colmajor) {
	incci = 1;
	inccij = ldc;

	if (transa == blas_no_trans) {
	  incai = 1;
	  incaih = lda;
	} else {
	  incai = lda;
	  incaih = 1;
	}

	if (transb == blas_no_trans) {
	  incbj = ldb;
	  incbhj = 1;
	} else {
	  incbj = 1;
	  incbhj = ldb;
	}

      } else {
	/* row major */
	incci = ldc;
	inccij = 1;

	if (transa == blas_no_trans) {
	  incai = lda;
	  incaih = 1;
	} else {
	  incai = 1;
	  incaih = lda;
	}

	if (transb == blas_no_trans) {
	  incbj = 1;
	  incbhj = ldb;
	} else {
	  incbj = ldb;
	  incbhj = 1;
	}

      }



      /* Ajustment to increments */







      /* alpha = 0.  In this case, just return beta * C */
      if (alpha_i == 0.0) {

	ci = 0;
	for (i = 0; i < m; i++, ci += incci) {
	  cij = ci;
	  for (j = 0; j < n; j++, cij += inccij) {
	    c_elem = c_i[cij];
	    tmp1 = c_elem * beta_i;
	    c_i[cij] = tmp1;
	  }
	}

      } else if (alpha_i == 1.0) {

	/* Case alpha == 1. */

	if (beta_i == 0.0) {
	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */

	  ci = 0;
	  ai = 0;
	  for (i = 0; i < m; i++, ci += incci, ai += incai) {

	    cij = ci;
	    bj = 0;

	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	      aih = ai;
	      bhj = bj;

	      sum = 0.0;

	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
		a_elem = a_i[aih];
		b_elem = b_i[bhj];
		if (transa == blas_conj_trans) {

		}
		if (transb == blas_conj_trans) {

		}
		prod = a_elem * b_elem;
		sum = sum + prod;
	      }
	      c_i[cij] = sum;
	    }
	  }

	} else {
	  /*
	   * Case alpha == 1, but beta != 0. We compute   C <--- A * B + beta *
	   * C
	   */

	  ci = 0;
	  ai = 0;
	  for (i = 0; i < m; i++, ci += incci, ai += incai) {

	    cij = ci;
	    bj = 0;

	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	      aih = ai;
	      bhj = bj;

	      sum = 0.0;

	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
		a_elem = a_i[aih];
		b_elem = b_i[bhj];
		if (transa == blas_conj_trans) {

		}
		if (transb == blas_conj_trans) {

		}
		prod = a_elem * b_elem;
		sum = sum + prod;
	      }

	      c_elem = c_i[cij];
	      tmp2 = c_elem * beta_i;
	      tmp1 = sum;
	      tmp1 = tmp2 + tmp1;
	      c_i[cij] = tmp1;
	    }
	  }
	}

      } else {

	/* The most general form,   C <-- alpha * A * B + beta * C  */
	ci = 0;
	ai = 0;
	for (i = 0; i < m; i++, ci += incci, ai += incai) {

	  cij = ci;
	  bj = 0;

	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	    aih = ai;
	    bhj = bj;

	    sum = 0.0;

	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
	      a_elem = a_i[aih];
	      b_elem = b_i[bhj];
	      if (transa == blas_conj_trans) {

	      }
	      if (transb == blas_conj_trans) {

	      }
	      prod = a_elem * b_elem;
	      sum = sum + prod;
	    }

	    tmp1 = sum * alpha_i;
	    c_elem = c_i[cij];
	    tmp2 = c_elem * beta_i;
	    tmp1 = tmp1 + tmp2;
	    c_i[cij] = tmp1;
	  }
	}

      }



      break;
    }

  case blas_prec_extra:{


      /* Integer Index Variables */
      int	      i    , j, h;

      int	      ai   , bj, ci;
      int	      aih  , bhj, cij;	/* Index into matrices a, b, c during
					 * multiply */

      int	      incai, incaih;	/* Index increments for matrix a */
      int	      incbj, incbhj;	/* Index increments for matrix b */
      int	      incci, inccij;	/* Index increments for matrix c */

      /* Input Matrices */
      const float    *a_i = a;
      const double   *b_i = b;

      /* Output Matrix */
      double         *c_i = c;

      /* Input Scalars */
      double	      alpha_i = alpha;
      double	      beta_i = beta;

      /* Temporary Floating-Point Variables */
      float	      a_elem;
      double	      b_elem;
      double	      c_elem;
      double	      head_prod, tail_prod;
      double	      head_sum, tail_sum;
      double	      head_tmp1, tail_tmp1;
      double	      head_tmp2, tail_tmp2;

      FPU_FIX_DECL;

      /* Test for error conditions */
      if (m < 0)
	BLAS_error(routine_name, -4, m, NULL);
      if (n < 0)
	BLAS_error(routine_name, -5, n, NULL);
      if (k < 0)
	BLAS_error(routine_name, -6, k, NULL);

      if (order == blas_colmajor) {

	if (ldc < m)
	  BLAS_error(routine_name, -14, ldc, NULL);

	if (transa == blas_no_trans) {
	  if (lda < m)
	    BLAS_error(routine_name, -9, lda, NULL);
	} else {
	  if (lda < k)
	    BLAS_error(routine_name, -9, lda, NULL);
	}

	if (transb == blas_no_trans) {
	  if (ldb < k)
	    BLAS_error(routine_name, -11, ldb, NULL);
	} else {
	  if (ldb < n)
	    BLAS_error(routine_name, -11, ldb, NULL);
	}

      } else {
	/* row major */
	if (ldc < n)
	  BLAS_error(routine_name, -14, ldc, NULL);

	if (transa == blas_no_trans) {
	  if (lda < k)
	    BLAS_error(routine_name, -9, lda, NULL);
	} else {
	  if (lda < m)
	    BLAS_error(routine_name, -9, lda, NULL);
	}

	if (transb == blas_no_trans) {
	  if (ldb < n)
	    BLAS_error(routine_name, -11, ldb, NULL);
	} else {
	  if (ldb < k)
	    BLAS_error(routine_name, -11, ldb, NULL);
	}
      }

      /* Test for no-op */
      if (n == 0 || m == 0 || k == 0)
	return;
      if (alpha_i == 0.0 && beta_i == 1.0) {
	return;
      }
      /* Set Index Parameters */
      if (order == blas_colmajor) {
	incci = 1;
	inccij = ldc;

	if (transa == blas_no_trans) {
	  incai = 1;
	  incaih = lda;
	} else {
	  incai = lda;
	  incaih = 1;
	}

	if (transb == blas_no_trans) {
	  incbj = ldb;
	  incbhj = 1;
	} else {
	  incbj = 1;
	  incbhj = ldb;
	}

      } else {
	/* row major */
	incci = ldc;
	inccij = 1;

	if (transa == blas_no_trans) {
	  incai = lda;
	  incaih = 1;
	} else {
	  incai = 1;
	  incaih = lda;
	}

	if (transb == blas_no_trans) {
	  incbj = 1;
	  incbhj = ldb;
	} else {
	  incbj = ldb;
	  incbhj = 1;
	}

      }

      FPU_FIX_START;

      /* Ajustment to increments */







      /* alpha = 0.  In this case, just return beta * C */
      if (alpha_i == 0.0) {

	ci = 0;
	for (i = 0; i < m; i++, ci += incci) {
	  cij = ci;
	  for (j = 0; j < n; j++, cij += inccij) {
	    c_elem = c_i[cij];
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = c_elem * split;
	      a1 = con - c_elem;
	      a1 = con - a1;
	      a2 = c_elem - a1;
	      con = beta_i * split;
	      b1 = con - beta_i;
	      b1 = con - b1;
	      b2 = beta_i - b1;

	      head_tmp1 = c_elem * beta_i;
	      tail_tmp1 = (((a1 * b1 - head_tmp1) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    c_i[cij] = head_tmp1;
	  }
	}

      } else if (alpha_i == 1.0) {

	/* Case alpha == 1. */

	if (beta_i == 0.0) {
	  /* Case alpha == 1, beta == 0.   We compute  C <--- A * B */

	  ci = 0;
	  ai = 0;
	  for (i = 0; i < m; i++, ci += incci, ai += incai) {

	    cij = ci;
	    bj = 0;

	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	      aih = ai;
	      bhj = bj;

	      head_sum = tail_sum = 0.0;

	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
		a_elem = a_i[aih];
		b_elem = b_i[bhj];
		if (transa == blas_conj_trans) {

		}
		if (transb == blas_conj_trans) {

		} {
		  double	  dt = (double)a_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = dt * split;
		    a1 = con - dt;
		    a1 = con - a1;
		    a2 = dt - a1;
		    con = b_elem * split;
		    b1 = con - b_elem;
		    b1 = con - b1;
		    b2 = b_elem - b1;

		    head_prod = dt * b_elem;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum + head_prod;
		  bv = s1 - head_sum;
		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum + tail_prod;
		  bv = t1 - tail_sum;
		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum = t1 + t2;
		  tail_sum = t2 - (head_sum - t1);
		}
	      }
	      c_i[cij] = head_sum;
	    }
	  }

	} else {
	  /*
	   * Case alpha == 1, but beta != 0. We compute   C <--- A * B + beta *
	   * C
	   */

	  ci = 0;
	  ai = 0;
	  for (i = 0; i < m; i++, ci += incci, ai += incai) {

	    cij = ci;
	    bj = 0;

	    for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	      aih = ai;
	      bhj = bj;

	      head_sum = tail_sum = 0.0;

	      for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
		a_elem = a_i[aih];
		b_elem = b_i[bhj];
		if (transa == blas_conj_trans) {

		}
		if (transb == blas_conj_trans) {

		} {
		  double	  dt = (double)a_elem;
		  {
		    /* Compute double_double = double * double. */
		    double	    a1  , a2, b1, b2, con;

		    con = dt * split;
		    a1 = con - dt;
		    a1 = con - a1;
		    a2 = dt - a1;
		    con = b_elem * split;
		    b1 = con - b_elem;
		    b1 = con - b1;
		    b2 = b_elem - b1;

		    head_prod = dt * b_elem;
		    tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		  }
		}
		{
		  /* Compute double-double = double-double + double-double. */
		  double	  bv;
		  double	  s1    , s2, t1, t2;

		  /* Add two hi words. */
		  s1 = head_sum + head_prod;
		  bv = s1 - head_sum;
		  s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		  /* Add two lo words. */
		  t1 = tail_sum + tail_prod;
		  bv = t1 - tail_sum;
		  t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		  s2 += t1;

		  /* Renormalize (s1, s2)  to  (t1, s2) */
		  t1 = s1 + s2;
		  s2 = s2 - (t1 - s1);

		  t2 += s2;

		  /* Renormalize (t1, t2)  */
		  head_sum = t1 + t2;
		  tail_sum = t2 - (head_sum - t1);
		}
	      }

	      c_elem = c_i[cij];
	      {
		/* Compute double_double = double * double. */
		double		a1     , a2, b1, b2, con;

		con = c_elem * split;
		a1 = con - c_elem;
		a1 = con - a1;
		a2 = c_elem - a1;
		con = beta_i * split;
		b1 = con - beta_i;
		b1 = con - b1;
		b2 = beta_i - b1;

		head_tmp2 = c_elem * beta_i;
		tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
	      }
	      head_tmp1 = head_sum;
	      tail_tmp1 = tail_sum;
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_tmp2 + head_tmp1;
		bv = s1 - head_tmp2;
		s2 = ((head_tmp1 - bv) + (head_tmp2 - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_tmp2 + tail_tmp1;
		bv = t1 - tail_tmp2;
		t2 = ((tail_tmp1 - bv) + (tail_tmp2 - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_tmp1 = t1 + t2;
		tail_tmp1 = t2 - (head_tmp1 - t1);
	      }
	      c_i[cij] = head_tmp1;
	    }
	  }
	}

      } else {

	/* The most general form,   C <-- alpha * A * B + beta * C  */
	ci = 0;
	ai = 0;
	for (i = 0; i < m; i++, ci += incci, ai += incai) {

	  cij = ci;
	  bj = 0;

	  for (j = 0; j < n; j++, cij += inccij, bj += incbj) {

	    aih = ai;
	    bhj = bj;

	    head_sum = tail_sum = 0.0;

	    for (h = 0; h < k; h++, aih += incaih, bhj += incbhj) {
	      a_elem = a_i[aih];
	      b_elem = b_i[bhj];
	      if (transa == blas_conj_trans) {

	      }
	      if (transb == blas_conj_trans) {

	      } {
		double		dt = (double)a_elem;
		{
		  /* Compute double_double = double * double. */
		  double	  a1    , a2, b1, b2, con;

		  con = dt * split;
		  a1 = con - dt;
		  a1 = con - a1;
		  a2 = dt - a1;
		  con = b_elem * split;
		  b1 = con - b_elem;
		  b1 = con - b1;
		  b2 = b_elem - b1;

		  head_prod = dt * b_elem;
		  tail_prod = (((a1 * b1 - head_prod) + a1 * b2) + a2 * b1) + a2 * b2;
		}
	      }
	      {
		/* Compute double-double = double-double + double-double. */
		double		bv;
		double		s1     , s2, t1, t2;

		/* Add two hi words. */
		s1 = head_sum + head_prod;
		bv = s1 - head_sum;
		s2 = ((head_prod - bv) + (head_sum - (s1 - bv)));

		/* Add two lo words. */
		t1 = tail_sum + tail_prod;
		bv = t1 - tail_sum;
		t2 = ((tail_prod - bv) + (tail_sum - (t1 - bv)));

		s2 += t1;

		/* Renormalize (s1, s2)  to  (t1, s2) */
		t1 = s1 + s2;
		s2 = s2 - (t1 - s1);

		t2 += s2;

		/* Renormalize (t1, t2)  */
		head_sum = t1 + t2;
		tail_sum = t2 - (head_sum - t1);
	      }
	    }

	    {
	      /* Compute double-double = double-double * double. */
	      double	      a11, a21, b1, b2, c11, c21, c2, con, t1, t2;

	      con = head_sum * split;
	      a11 = con - head_sum;
	      a11 = con - a11;
	      a21 = head_sum - a11;
	      con = alpha_i * split;
	      b1 = con - alpha_i;
	      b1 = con - b1;
	      b2 = alpha_i - b1;

	      c11 = head_sum * alpha_i;
	      c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;

	      c2 = tail_sum * alpha_i;
	      t1 = c11 + c2;
	      t2 = (c2 - (t1 - c11)) + c21;

	      head_tmp1 = t1 + t2;
	      tail_tmp1 = t2 - (head_tmp1 - t1);
	    }
	    c_elem = c_i[cij];
	    {
	      /* Compute double_double = double * double. */
	      double	      a1, a2, b1, b2, con;

	      con = c_elem * split;
	      a1 = con - c_elem;
	      a1 = con - a1;
	      a2 = c_elem - a1;
	      con = beta_i * split;
	      b1 = con - beta_i;
	      b1 = con - b1;
	      b2 = beta_i - b1;

	      head_tmp2 = c_elem * beta_i;
	      tail_tmp2 = (((a1 * b1 - head_tmp2) + a1 * b2) + a2 * b1) + a2 * b2;
	    }
	    {
	      /* Compute double-double = double-double + double-double. */
	      double	      bv;
	      double	      s1, s2, t1, t2;

	      /* Add two hi words. */
	      s1 = head_tmp1 + head_tmp2;
	      bv = s1 - head_tmp1;
	      s2 = ((head_tmp2 - bv) + (head_tmp1 - (s1 - bv)));

	      /* Add two lo words. */
	      t1 = tail_tmp1 + tail_tmp2;
	      bv = t1 - tail_tmp1;
	      t2 = ((tail_tmp2 - bv) + (tail_tmp1 - (t1 - bv)));

	      s2 += t1;

	      /* Renormalize (s1, s2)  to  (t1, s2) */
	      t1 = s1 + s2;
	      s2 = s2 - (t1 - s1);

	      t2 += s2;

	      /* Renormalize (t1, t2)  */
	      head_tmp1 = t1 + t2;
	      tail_tmp1 = t2 - (head_tmp1 - t1);
	    }
	    c_i[cij] = head_tmp1;
	  }
	}

      }

      FPU_FIX_STOP;

      break;
    }
  }
}
Exemplo n.º 29
0
void BLAS_ctrmv_s(enum blas_order_type order, enum blas_uplo_type uplo,
		  enum blas_trans_type trans, enum blas_diag_type diag, int n,
		  const void *alpha, const float *T, int ldt,
		  void *x, int incx)

/*
 * Purpose
 * =======
 *
 * Computes x <-- alpha * T * x, where T is a triangular matrix.
 *
 * Arguments
 * =========
 * 
 * order  (input) enum blas_order_type
 *        column major, row major
 *
 * uplo   (input) enum blas_uplo_type
 *        upper, lower
 *
 * trans  (input) enum blas_trans_type
 *        no trans, trans, conj trans
 * 
 * diag   (input) enum blas_diag_type
 *        unit, non unit
 *
 * n      (input) int
 *        the dimension of T
 * 
 * alpha  (input) const void*
 * 
 * T      (input) float*
 *        Triangular matrix
 *
 * ldt    (input) int 
 *        Leading dimension of T
 *
 * x      (input) const void*
 *    Array of length n.
 * 
 * incx   (input) int
 *     The stride used to access components x[i].
 *
 */
{
  static const char routine_name[] = "BLAS_ctrmv_s";

  int i, j;			/* used to idx matrix */
  int xj, xj0;
  int ti, tij, tij0;

  int inc_ti, inc_tij;
  int inc_x;

  const float *T_i = T;		/* internal matrix T */
  float *x_i = (float *) x;	/* internal x */
  float *alpha_i = (float *) alpha;	/* internal alpha */

  float t_elem;
  float x_elem[2];
  float prod[2];
  float sum[2];
  float tmp[2];



  /* all error calls */
  if ((order != blas_rowmajor && order != blas_colmajor) ||
      (uplo != blas_upper && uplo != blas_lower) ||
      (trans != blas_trans &&
       trans != blas_no_trans &&
       trans != blas_conj_trans) ||
      (diag != blas_non_unit_diag && diag != blas_unit_diag) ||
      (ldt < n) || (incx == 0)) {
    BLAS_error(routine_name, 0, 0, NULL);
  } else if (n <= 0) {
    BLAS_error(routine_name, -4, n, NULL);
  } else if (incx == 0) {
    BLAS_error(routine_name, -9, incx, NULL);
  }

  if (trans == blas_no_trans) {
    if (uplo == blas_upper) {
      inc_x = -incx;
      if (order == blas_rowmajor) {
	inc_ti = ldt;
	inc_tij = -1;
      } else {
	inc_ti = 1;
	inc_tij = -ldt;
      }
    } else {
      inc_x = incx;
      if (order == blas_rowmajor) {
	inc_ti = -ldt;
	inc_tij = 1;
      } else {
	inc_ti = -1;
	inc_tij = ldt;
      }
    }
  } else {
    if (uplo == blas_upper) {
      inc_x = incx;
      if (order == blas_rowmajor) {
	inc_ti = -1;
	inc_tij = ldt;
      } else {
	inc_ti = -ldt;
	inc_tij = 1;
      }
    } else {
      inc_x = -incx;
      if (order == blas_rowmajor) {
	inc_ti = 1;
	inc_tij = -ldt;
      } else {
	inc_ti = ldt;
	inc_tij = -1;
      }
    }
  }



  inc_x *= 2;

  xj0 = (inc_x > 0 ? 0 : -(n - 1) * inc_x);
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
    xj = xj0;
    for (j = 0; j < n; j++) {
      x_i[xj] = 0.0;
      x_i[xj + 1] = 0.0;
      xj += inc_x;
    }
  } else {

    if (diag == blas_unit_diag) {


      ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
      tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
      for (i = 0; i < n; i++) {

	sum[0] = sum[1] = 0.0;

	xj = xj0;
	tij = ti + tij0;
	for (j = i; j < (n - 1); j++) {

	  t_elem = T_i[tij];

	  x_elem[0] = x_i[xj];
	  x_elem[1] = x_i[xj + 1];
	  {
	    prod[0] = x_elem[0] * t_elem;
	    prod[1] = x_elem[1] * t_elem;
	  }
	  sum[0] = sum[0] + prod[0];
	  sum[1] = sum[1] + prod[1];

	  xj += inc_x;
	  tij += inc_tij;
	}

	x_elem[0] = x_i[xj];
	x_elem[1] = x_i[xj + 1];
	sum[0] = sum[0] + x_elem[0];
	sum[1] = sum[1] + x_elem[1];

	if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	  x_i[xj] = sum[0];
	  x_i[xj + 1] = sum[1];
	} else {
	  {
	    tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
	    tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
	  }

	  x_i[xj] = tmp[0];
	  x_i[xj + 1] = tmp[1];
	}

	ti += inc_ti;
      }

    } else {


      ti = (inc_ti > 0 ? 0 : -(n - 1) * inc_ti);
      tij0 = (inc_tij > 0 ? 0 : -(n - 1) * inc_tij);
      for (i = 0; i < n; i++) {

	sum[0] = sum[1] = 0.0;

	xj = xj0;
	tij = ti + tij0;
	for (j = i; j < n; j++) {

	  t_elem = T_i[tij];

	  x_elem[0] = x_i[xj];
	  x_elem[1] = x_i[xj + 1];
	  {
	    prod[0] = x_elem[0] * t_elem;
	    prod[1] = x_elem[1] * t_elem;
	  }
	  sum[0] = sum[0] + prod[0];
	  sum[1] = sum[1] + prod[1];

	  xj += inc_x;
	  tij += inc_tij;
	}

	if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	  x_i[xj - inc_x] = sum[0];
	  x_i[xj - inc_x + 1] = sum[1];
	} else {
	  {
	    tmp[0] = sum[0] * alpha_i[0] - sum[1] * alpha_i[1];
	    tmp[1] = sum[0] * alpha_i[1] + sum[1] * alpha_i[0];
	  }

	  x_i[xj - inc_x] = tmp[0];
	  x_i[xj - inc_x + 1] = tmp[1];
	}

	ti += inc_ti;
      }

    }

  }



}
Exemplo n.º 30
0
void		BLAS_zgemv_c_c(enum blas_order_type order, enum blas_trans_type trans,
		     		int		m       , int n, const void *alpha, const void *a, int lda,
		     		const		void  *x, int incx, const void *beta, void *y,
		     		int		incy)
/*
 * Purpose
 * =======
 *
 * Computes y = alpha * A * x + beta * y, where A is a general matrix.
 *
 * Arguments
 * =========
 *
 * order        (input) blas_order_type
 *              Order of AP; row or column major
 *
 * trans        (input) blas_trans_type
 *              Transpose of AB; no trans,
 *              trans, or conjugate trans
 *
 * m            (input) int
 *              Dimension of AB
 *
 * n            (input) int
 *              Dimension of AB and the length of vector x
 *
 * alpha        (input) const void*
 *
 * A            (input) const void*
 *
 * lda          (input) int
 *              Leading dimension of A
 *
 * x            (input) const void*
 *
 * incx         (input) int
 *              The stride for vector x.
 *
 * beta         (input) const void*
 *
 * y            (input/output) void*
 *
 * incy         (input) int
 *              The stride for vector y.
 *
 */
{
  static const char routine_name[] = "BLAS_zgemv_c_c";

  int		  i       , j;
  int		  iy      , jx, kx, ky;
  int		  lenx    , leny;
  int		  ai      , aij;
  int		  incai   , incaij;

  const float    *a_i = (float *)a;
  const float    *x_i = (float *)x;
  double         *y_i = (double *)y;
  double         *alpha_i = (double *)alpha;
  double         *beta_i = (double *)beta;
  float		  a_elem [2];
  float		  x_elem [2];
  double	  y_elem [2];
  double	  prod   [2];
  double	  sum    [2];
  double	  tmp1   [2];
  double	  tmp2   [2];


  /* all error calls */
  if (m < 0)
    BLAS_error(routine_name, -3, m, 0);
  else if (n <= 0)
    BLAS_error(routine_name, -4, n, 0);
  else if (incx == 0)
    BLAS_error(routine_name, -9, incx, 0);
  else if (incy == 0)
    BLAS_error(routine_name, -12, incy, 0);

  if ((order == blas_rowmajor) && (trans == blas_no_trans)) {
    lenx = n;
    leny = m;
    incai = lda;
    incaij = 1;
  } else if ((order == blas_rowmajor) && (trans != blas_no_trans)) {
    lenx = m;
    leny = n;
    incai = 1;
    incaij = lda;
  } else if ((order == blas_colmajor) && (trans == blas_no_trans)) {
    lenx = n;
    leny = m;
    incai = 1;
    incaij = lda;
  } else {			/* colmajor and blas_trans */
    lenx = m;
    leny = n;
    incai = lda;
    incaij = 1;
  }
  if ((order == blas_colmajor && lda < m) ||
      (order == blas_rowmajor && lda < n))
    BLAS_error(routine_name, -7, lda, NULL);



  incx *= 2;
  incy *= 2;
  incai *= 2;
  incaij *= 2;

  if (incx > 0)
    kx = 0;
  else
    kx = (1 - lenx) * incx;
  if (incy > 0)
    ky = 0;
  else
    ky = (1 - leny) * incy;

  /* No extra-precision needed for alpha = 0 */
  if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
    if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
      iy = ky;
      for (i = 0; i < leny; i++) {
	y_i[iy] = 0.0;
	y_i[iy + 1] = 0.0;
	iy += incy;
      }
    } else if (!(beta_i[0] == 0.0 && beta_i[1] == 0.0)) {
      iy = ky;
      for (i = 0; i < leny; i++) {
	y_elem[0] = y_i[iy];
	y_elem[1] = y_i[iy + 1];
	{
	  tmp1[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	  tmp1[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	}
	y_i[iy] = tmp1[0];
	y_i[iy + 1] = tmp1[1];
	iy += incy;
      }
    }
  } else {
    if (trans == blas_conj_trans) {

      /* if beta = 0, we can save m multiplies: y = alpha*A*x */
      if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	/* save m more multiplies if alpha = 1 */
	if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	  ai = 0;
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    sum[0] = sum[1] = 0.0;
	    aij = ai;
	    jx = kx;
	    for (j = 0; j < lenx; j++) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[jx];
	      x_elem[1] = x_i[jx + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	      aij += incaij;
	      jx += incx;
	    }
	    y_i[iy] = sum[0];
	    y_i[iy + 1] = sum[1];
	    ai += incai;
	    iy += incy;
	  }
	} else {
	  ai = 0;
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    sum[0] = sum[1] = 0.0;
	    aij = ai;
	    jx = kx;
	    for (j = 0; j < lenx; j++) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];
	      a_elem[1] = -a_elem[1];
	      x_elem[0] = x_i[jx];
	      x_elem[1] = x_i[jx + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	      aij += incaij;
	      jx += incx;
	    }
	    {
	      tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
	      tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	    }
	    y_i[iy] = tmp1[0];
	    y_i[iy + 1] = tmp1[1];
	    ai += incai;
	    iy += incy;
	  }
	}
      } else {
	/* the most general form, y = alpha*A*x + beta*y */
	ai = 0;
	iy = ky;
	for (i = 0; i < leny; i++) {
	  sum[0] = sum[1] = 0.0;;
	  aij = ai;
	  jx = kx;
	  for (j = 0; j < lenx; j++) {
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];
	    a_elem[1] = -a_elem[1];
	    x_elem[0] = x_i[jx];
	    x_elem[1] = x_i[jx + 1];
	    {
	      prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
	      prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	  {
	    tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
	    tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	  }
	  y_elem[0] = y_i[iy];
	  y_elem[1] = y_i[iy + 1];
	  {
	    tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	    tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	  }
	  tmp1[0] = tmp1[0] + tmp2[0];
	  tmp1[1] = tmp1[1] + tmp2[1];
	  y_i[iy] = tmp1[0];
	  y_i[iy + 1] = tmp1[1];
	  ai += incai;
	  iy += incy;
	}
      }

    } else {

      /* if beta = 0, we can save m multiplies: y = alpha*A*x */
      if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
	/* save m more multiplies if alpha = 1 */
	if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
	  ai = 0;
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    sum[0] = sum[1] = 0.0;
	    aij = ai;
	    jx = kx;
	    for (j = 0; j < lenx; j++) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];

	      x_elem[0] = x_i[jx];
	      x_elem[1] = x_i[jx + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	      aij += incaij;
	      jx += incx;
	    }
	    y_i[iy] = sum[0];
	    y_i[iy + 1] = sum[1];
	    ai += incai;
	    iy += incy;
	  }
	} else {
	  ai = 0;
	  iy = ky;
	  for (i = 0; i < leny; i++) {
	    sum[0] = sum[1] = 0.0;
	    aij = ai;
	    jx = kx;
	    for (j = 0; j < lenx; j++) {
	      a_elem[0] = a_i[aij];
	      a_elem[1] = a_i[aij + 1];

	      x_elem[0] = x_i[jx];
	      x_elem[1] = x_i[jx + 1];
	      {
		prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
		prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	      }
	      sum[0] = sum[0] + prod[0];
	      sum[1] = sum[1] + prod[1];
	      aij += incaij;
	      jx += incx;
	    }
	    {
	      tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
	      tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	    }
	    y_i[iy] = tmp1[0];
	    y_i[iy + 1] = tmp1[1];
	    ai += incai;
	    iy += incy;
	  }
	}
      } else {
	/* the most general form, y = alpha*A*x + beta*y */
	ai = 0;
	iy = ky;
	for (i = 0; i < leny; i++) {
	  sum[0] = sum[1] = 0.0;;
	  aij = ai;
	  jx = kx;
	  for (j = 0; j < lenx; j++) {
	    a_elem[0] = a_i[aij];
	    a_elem[1] = a_i[aij + 1];

	    x_elem[0] = x_i[jx];
	    x_elem[1] = x_i[jx + 1];
	    {
	      prod[0] = (double)a_elem[0] * x_elem[0] - (double)a_elem[1] * x_elem[1];
	      prod[1] = (double)a_elem[0] * x_elem[1] + (double)a_elem[1] * x_elem[0];
	    }
	    sum[0] = sum[0] + prod[0];
	    sum[1] = sum[1] + prod[1];
	    aij += incaij;
	    jx += incx;
	  }
	  {
	    tmp1[0] = (double)sum[0] * alpha_i[0] - (double)sum[1] * alpha_i[1];
	    tmp1[1] = (double)sum[0] * alpha_i[1] + (double)sum[1] * alpha_i[0];
	  }
	  y_elem[0] = y_i[iy];
	  y_elem[1] = y_i[iy + 1];
	  {
	    tmp2[0] = (double)y_elem[0] * beta_i[0] - (double)y_elem[1] * beta_i[1];
	    tmp2[1] = (double)y_elem[0] * beta_i[1] + (double)y_elem[1] * beta_i[0];
	  }
	  tmp1[0] = tmp1[0] + tmp2[0];
	  tmp1[1] = tmp1[1] + tmp2[1];
	  y_i[iy] = tmp1[0];
	  y_i[iy + 1] = tmp1[1];
	  ai += incai;
	  iy += incy;
	}
      }

    }
  }



}