Exemple #1
0
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer) {

    BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
    OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
    FLOAT atemp1, atemp2, btemp1, btemp2;
#endif
    FLOAT *gemvbuffer = (FLOAT *)buffer;
    FLOAT *B = b;

    if (incb != 1) {
        B = buffer;
        gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
        COPY_K(m, b, incb, buffer, 1);
    }

    for (is =0; is < m; is += DTB_ENTRIES) {

        min_i = MIN(m - is, DTB_ENTRIES);

#if (TRANSA) == 1 || (TRANSA == 3)
        if (is > 0) {
#if   TRANSA == 1
            GEMV_N(is, min_i, 0, dp1, ZERO,
                   a + is * lda * 2, lda,
                   B + is       * 2, 1,
                   B,                1, gemvbuffer);
#else
            GEMV_R(is, min_i, 0, dp1, ZERO,
                   a + is * lda * 2, lda,
                   B + is       * 2, 1,
                   B,                1, gemvbuffer);
#endif
        }
#endif

        for (i = 0; i < min_i; i++) {
            FLOAT *AA = a + (is + (i + is) * lda) * 2;
            FLOAT *BB = B + is * 2;

#if (TRANSA == 1) || (TRANSA == 3)
#if   TRANSA == 1
            if (i > 0) AXPYU_K (i, 0, 0, BB[i * 2 + 0],  BB[i * 2 + 1],
                                    AA, 1, BB, 1, NULL, 0);
#else
            if (i > 0) AXPYC_K(i, 0, 0, BB[i * 2 + 0],  BB[i * 2 + 1],
                                   AA, 1, BB, 1, NULL, 0);
#endif
#endif

#ifndef UNIT
            atemp1 = AA[i * 2 + 0];
            atemp2 = AA[i * 2 + 1];

            btemp1 = BB[i * 2 + 0];
            btemp2 = BB[i * 2 + 1];

#if (TRANSA == 1) || (TRANSA == 2)
            BB[i * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2;
            BB[i * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1;
#else
            BB[i * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2;
            BB[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1;
#endif
#endif

#if (TRANSA == 2) || (TRANSA == 4)
            if (i < min_i - 1) {
#if TRANSA == 2
                temp = DOTU_K(min_i - i - 1,
                              AA + (i + 1) * 2, 1,
                              BB + (i + 1) * 2, 1);
#else
                temp = DOTC_K(min_i - i - 1,
                              AA + (i + 1) * 2, 1,
                              BB + (i + 1) * 2, 1);
#endif

                BB[i * 2 + 0] += CREAL(temp);
                BB[i * 2 + 1] += CIMAG(temp);
            }
#endif

        }

#if (TRANSA) == 2 || (TRANSA == 4)
        if (m - is > min_i) {
#if TRANSA == 2
            GEMV_T(m - is - min_i, min_i, 0, dp1, ZERO,
                   a + (is + min_i + is * lda) * 2,  lda,
                   B + (is + min_i) * 2, 1,
                   B +  is            * 2, 1, gemvbuffer);
#else
            GEMV_C(m - is - min_i, min_i, 0, dp1, ZERO,
                   a + (is + min_i + is * lda) * 2,  lda,
                   B + (is + min_i) * 2, 1,
                   B +  is            * 2, 1, gemvbuffer);
#endif
        }
#endif
    }

    if (incb != 1) {
        COPY_K(m, buffer, 1, b, incb);
    }

    return 0;
}
Exemple #2
0
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

  BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
  FLOAT _Complex result;
#endif
#ifndef UNIT
  FLOAT ar, ai, br, bi, ratio, den;
#endif
  FLOAT *gemvbuffer = (FLOAT *)buffer;
  FLOAT *B = b;

  if (incb != 1) {
    B = buffer;
    gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
    COPY_K(m, b, incb, buffer, 1);
  }

  for (is = m; is > 0; is -= DTB_ENTRIES){

    min_i = MIN(is, DTB_ENTRIES);

#if (TRANSA == 2) || (TRANSA == 4)
    if (m - is > 0){
#if TRANSA == 2
      GEMV_T(m - is, min_i, 0, dm1, ZERO,
	      a + (is + (is - min_i)  * lda) * COMPSIZE, lda,
	      B +  is          * COMPSIZE, 1,
	      B + (is - min_i) * COMPSIZE, 1, gemvbuffer);
#else
      GEMV_C(m - is, min_i, 0, dm1, ZERO,
	      a + (is + (is - min_i)  * lda) * COMPSIZE, lda,
	      B +  is            * COMPSIZE, 1,
	      B + (is - min_i)  * COMPSIZE, 1, gemvbuffer);
#endif
    }
#endif

    for (i = 0; i < min_i; i++) {
      FLOAT *AA = a + ((is - i - 1) + (is - i - 1) * lda) * COMPSIZE;
      FLOAT *BB = B + (is - i - 1) * COMPSIZE;

#if (TRANSA == 2) || (TRANSA == 4)
      if (i > 0) {
#if TRANSA == 2
	result = DOTU_K(i, AA + 2, 1, BB + 2, 1);
#else
	result = DOTC_K(i, AA + 2, 1, BB + 2, 1);
#endif

      BB[0] -= CREAL(result);
      BB[1] -= CIMAG(result);
      }
#endif

#ifndef UNIT
      ar = AA[0];
      ai = AA[1];
      
      if (fabs(ar) >= fabs(ai)){
	ratio = ai / ar;
	den = 1./(ar * ( 1 + ratio * ratio));
	
	ar =  den;
#if TRANSA < 3
	ai = -ratio * den;
#else
	ai =  ratio * den;
#endif
      } else {
	ratio = ar / ai;
	den = 1./(ai * ( 1 + ratio * ratio));
	ar =  ratio * den;
#if TRANSA < 3
	ai = -den;
#else
	ai =  den;
#endif
    }

      br = BB[0];
      bi = BB[1];
      
      BB[0] = ar*br - ai*bi;
      BB[1] = ar*bi + ai*br;
#endif

#if (TRANSA == 1) || (TRANSA == 3)
      if (i < min_i - 1) {
#if TRANSA == 1
	AXPYU_K (min_i - i - 1, 0, 0, - BB[0], -BB[1],
		 AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0);
#else
	AXPYC_K(min_i - i - 1, 0, 0, - BB[0], -BB[1],
		 AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0);
#endif
      }
#endif
    }

#if (TRANSA == 1) || (TRANSA == 3)
    if (is - min_i > 0){
#if   TRANSA == 1
      GEMV_N(is - min_i, min_i, 0, dm1, ZERO,
	      a +  (is - min_i) * lda * COMPSIZE, lda,
	      B +  (is - min_i)       * COMPSIZE, 1,
	      B,                                  1, gemvbuffer);
#else
      GEMV_R(is - min_i, min_i, 0, dm1, ZERO,
	      a +  (is - min_i) * lda * COMPSIZE, lda,
	      B +  (is - min_i)       * COMPSIZE, 1,
	      B,                                  1, gemvbuffer);
#endif
    }
#endif
  }

  if (incb != 1) {
    COPY_K(m, buffer, 1, b, incb);
  }

  return 0;
}
Exemple #3
0
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, 
	 FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){

  BLASLONG is, min_i;
  FLOAT *X = x;
  FLOAT *Y = y;
  FLOAT *symbuffer  = buffer;
  FLOAT *gemvbuffer = (FLOAT *)(((BLASLONG)buffer + SYMV_P * SYMV_P * sizeof(FLOAT) * 2 + 4095) & ~4095);
  FLOAT *bufferY    = gemvbuffer;
  FLOAT *bufferX    = gemvbuffer;

  if (incy != 1) {
    Y = bufferY;
    bufferX    = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
    gemvbuffer = bufferX;
    COPY_K(m, y, incy, Y, 1);
  }

  if (incx != 1) {
    X = bufferX;
    gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
    COPY_K(m, x, incx, X, 1);
  }

#ifndef LOWER
  for(is = m - offset; is < m; is += SYMV_P){
    min_i = MIN(m - is, SYMV_P);
#else
  for(is = 0; is < offset; is += SYMV_P){
    min_i = MIN(offset - is, SYMV_P);
#endif

#ifndef LOWER
    if (is > 0){
#ifndef HEMVREV
      GEMV_C(is, min_i, 0, alpha_r, alpha_i, 
	      a + is * lda * 2,  lda,
	      X,          1,
	      Y + is * 2, 1, gemvbuffer);

      GEMV_N(is, min_i, 0, alpha_r, alpha_i,
	      a + is * lda * 2,  lda,
	      X + is * 2, 1,
	      Y,          1, gemvbuffer);
#else
      GEMV_T(is, min_i, 0, alpha_r, alpha_i, 
	      a + is * lda * 2,  lda,
	      X,          1,
	      Y + is * 2, 1, gemvbuffer);

      GEMV_R(is, min_i, 0, alpha_r, alpha_i,
	      a + is * lda * 2,  lda,
	      X + is * 2, 1,
	      Y,          1, gemvbuffer);
#endif
    }
#endif

#ifndef HEMVREV
#ifdef LOWER
    ZHEMCOPY_L(min_i, a + (is + is * lda) * 2, lda, symbuffer);
#else
    ZHEMCOPY_U(min_i, a + (is + is * lda) * 2, lda, symbuffer);
#endif
#else
#ifdef LOWER
    ZHEMCOPY_M(min_i, a + (is + is * lda) * 2, lda, symbuffer);
#else
    ZHEMCOPY_V(min_i, a + (is + is * lda) * 2, lda, symbuffer);
#endif
#endif

    GEMV_N(min_i, min_i, 0, alpha_r, alpha_i, 
	    symbuffer, min_i,
	    X + is * 2, 1, 
	    Y + is * 2, 1, gemvbuffer);
    
#ifdef LOWER
    if (m - is - min_i > 0){

#ifndef HEMVREV
      GEMV_C(m - is - min_i, min_i, 0, alpha_r, alpha_i,
	      a + ((is + min_i) + is * lda) * 2, lda,
	      X + (is + min_i) * 2, 1,
	      Y +  is           * 2, 1, gemvbuffer);

      GEMV_N(m - is - min_i, min_i, 0, alpha_r, alpha_i,
	      a + ((is + min_i) + is * lda) * 2, lda,
	      X +  is           * 2,  1,
	      Y + (is + min_i) * 2,  1, gemvbuffer);
#else
      GEMV_T(m - is - min_i, min_i, 0, alpha_r, alpha_i,
	      a + ((is + min_i) + is * lda) * 2, lda,
	      X + (is + min_i) * 2, 1,
	      Y +  is           * 2, 1, gemvbuffer);

      GEMV_R(m - is - min_i, min_i, 0, alpha_r, alpha_i,
	      a + ((is + min_i) + is * lda) * 2, lda,
	      X +  is           * 2,  1,
	      Y + (is + min_i) * 2,  1, gemvbuffer);
#endif

    }
#endif

  } /* end of is */

  if (incy != 1) {
    COPY_K(m, Y, 1, y, incy);
  }

  return 0;
}