Beispiel #1
0
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {

  BLASLONG n, lda;
  FLOAT *a;

  FLOAT aii;
  BLASLONG i;

  n      = args -> n;
  a      = (FLOAT *)args -> a;
  lda    = args -> lda;

  if (range_n) {
    n      = range_n[1] - range_n[0];
    a     += range_n[0] * (lda + 1) * COMPSIZE;
  }

  for (i = 0; i < n; i++) {

    SCAL_K(i + 1, 0, 0, *(a + i + i * lda), a + i, lda, NULL, 0, NULL, 0);

    if (i < n - 1) {
      aii = DOTU_K(n - i - 1, a + i + 1 + i * lda, 1, a + i + 1 + i * lda, 1);

      *(a + i + i * lda) += aii;

      GEMV_T(n - i - 1, i, 0, dp1,
	     a + (i + 1)          , lda,
	     a + (i + 1) + i * lda, 1,
	     a + i                , lda, sb);
    }
  }

  return 0;
}
Beispiel #2
0
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer){

  BLASLONG i, is, min_i;
  FLOAT *gemvbuffer = (FLOAT *)buffer;
  FLOAT *B = b;

  if (incb != 1) {
    B = buffer;
    gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
    COPY_K(m, b, incb, buffer, 1);
  }

  for (is = 0; is < m; is += DTB_ENTRIES){
    
    min_i = MIN(m - is, DTB_ENTRIES);

#ifndef TRANSA
    if (is > 0){
      GEMV_N(is, min_i, 0, dp1, 
	     a + is * lda,  lda,
	     B + is, 1,
	     B,      1, gemvbuffer);
    }
#endif

    for (i = 0; i < min_i; i++) {
      FLOAT *AA = a + is + (i + is) * lda;
      FLOAT *BB = B + is;
      
#ifndef TRANSA
      if (i > 0) AXPYU_K(i, 0, 0, BB[i], AA, 1, BB, 1, NULL, 0);
#endif

#ifndef UNIT
      BB[i] *= AA[i];
#endif

#ifdef TRANSA
      if (i < min_i - 1) BB[i] += DOTU_K(min_i - i - 1, AA + i + 1, 1, BB + i + 1, 1);
#endif
    }

#ifdef TRANSA
    if (m - is > min_i){
      GEMV_T(m - is - min_i, min_i, 0, dp1, 
	     a + is + min_i + is * lda,  lda,
	     B + is + min_i, 1,
	     B + is,         1, gemvbuffer);
  }
#endif

  }

  if (incb != 1) {
    COPY_K(m, buffer, 1, b, incb);
  }

  return 0;
}
Beispiel #3
0
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

  BLASLONG i, is, min_i;
  FLOAT *gemvbuffer = (FLOAT *)buffer;
  FLOAT *B = b;

  if (incb != 1) {
    B = buffer;
    gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
    COPY_K(m, b, incb, buffer, 1);
  }
  
  for (is = m; is > 0; is -= DTB_ENTRIES){

    min_i = MIN(is, DTB_ENTRIES);

#ifdef TRANSA
    if (m - is > 0){
      GEMV_T(m - is, min_i, 0, dm1, 
	     a + is + (is - min_i) * lda, lda,
	     B + is, 1,
	     B + is - min_i, 1, gemvbuffer);
    }
#endif
    
    for (i = 0; i < min_i; i++) {
      FLOAT *AA = a + (is - i - 1) + (is - i - 1) * lda;
      FLOAT *BB = B + (is - i - 1);

#ifdef TRANSA
      if (i > 0) BB[0] -= DOTU_K(i, AA + 1, 1, BB + 1, 1);
#endif

#ifndef UNIT
      BB[0] /= AA[0];
#endif

#ifndef TRANSA
      if (i < min_i - 1) AXPYU_K(min_i - i - 1, 0, 0, -BB[0], AA - (min_i - i - 1), 1, BB - (min_i - i - 1), 1, NULL, 0);
#endif
    }

#ifndef TRANSA
    if (is - min_i > 0){
      GEMV_N(is - min_i, min_i, 0, dm1, 
	     a +  (is - min_i) * lda, lda,
	     B + is - min_i, 1,
	     B,              1, gemvbuffer);
    }
#endif
    
  }

  if (incb != 1) {
    COPY_K(m, buffer, 1, b, incb);
  }

  return 0;
}
Beispiel #4
0
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

  BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
  FLOAT _Complex result;
#endif
#ifndef UNIT
  FLOAT ar, ai, br, bi, ratio, den;
#endif
  FLOAT *gemvbuffer = (FLOAT *)buffer;
  FLOAT *B = b;

  if (incb != 1) {
    B = buffer;
    gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
    COPY_K(m, b, incb, buffer, 1);
  }

  for (is = m; is > 0; is -= DTB_ENTRIES){

    min_i = MIN(is, DTB_ENTRIES);

#if (TRANSA == 2) || (TRANSA == 4)
    if (m - is > 0){
#if TRANSA == 2
      GEMV_T(m - is, min_i, 0, dm1, ZERO,
	      a + (is + (is - min_i)  * lda) * COMPSIZE, lda,
	      B +  is          * COMPSIZE, 1,
	      B + (is - min_i) * COMPSIZE, 1, gemvbuffer);
#else
      GEMV_C(m - is, min_i, 0, dm1, ZERO,
	      a + (is + (is - min_i)  * lda) * COMPSIZE, lda,
	      B +  is            * COMPSIZE, 1,
	      B + (is - min_i)  * COMPSIZE, 1, gemvbuffer);
#endif
    }
#endif

    for (i = 0; i < min_i; i++) {
      FLOAT *AA = a + ((is - i - 1) + (is - i - 1) * lda) * COMPSIZE;
      FLOAT *BB = B + (is - i - 1) * COMPSIZE;

#if (TRANSA == 2) || (TRANSA == 4)
      if (i > 0) {
#if TRANSA == 2
	result = DOTU_K(i, AA + 2, 1, BB + 2, 1);
#else
	result = DOTC_K(i, AA + 2, 1, BB + 2, 1);
#endif

      BB[0] -= CREAL(result);
      BB[1] -= CIMAG(result);
      }
#endif

#ifndef UNIT
      ar = AA[0];
      ai = AA[1];
      
      if (fabs(ar) >= fabs(ai)){
	ratio = ai / ar;
	den = 1./(ar * ( 1 + ratio * ratio));
	
	ar =  den;
#if TRANSA < 3
	ai = -ratio * den;
#else
	ai =  ratio * den;
#endif
      } else {
	ratio = ar / ai;
	den = 1./(ai * ( 1 + ratio * ratio));
	ar =  ratio * den;
#if TRANSA < 3
	ai = -den;
#else
	ai =  den;
#endif
    }

      br = BB[0];
      bi = BB[1];
      
      BB[0] = ar*br - ai*bi;
      BB[1] = ar*bi + ai*br;
#endif

#if (TRANSA == 1) || (TRANSA == 3)
      if (i < min_i - 1) {
#if TRANSA == 1
	AXPYU_K (min_i - i - 1, 0, 0, - BB[0], -BB[1],
		 AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0);
#else
	AXPYC_K(min_i - i - 1, 0, 0, - BB[0], -BB[1],
		 AA - (min_i - i - 1) * COMPSIZE, 1, BB - (min_i - i - 1) * COMPSIZE, 1, NULL, 0);
#endif
      }
#endif
    }

#if (TRANSA == 1) || (TRANSA == 3)
    if (is - min_i > 0){
#if   TRANSA == 1
      GEMV_N(is - min_i, min_i, 0, dm1, ZERO,
	      a +  (is - min_i) * lda * COMPSIZE, lda,
	      B +  (is - min_i)       * COMPSIZE, 1,
	      B,                                  1, gemvbuffer);
#else
      GEMV_R(is - min_i, min_i, 0, dm1, ZERO,
	      a +  (is - min_i) * lda * COMPSIZE, lda,
	      B +  (is - min_i)       * COMPSIZE, 1,
	      B,                                  1, gemvbuffer);
#endif
    }
#endif
  }

  if (incb != 1) {
    COPY_K(m, buffer, 1, b, incb);
  }

  return 0;
}
Beispiel #5
0
int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *buffer) {

    BLASLONG i, is, min_i;
#if (TRANSA == 2) || (TRANSA == 4)
    OPENBLAS_COMPLEX_FLOAT temp;
#endif
#ifndef UNIT
    FLOAT atemp1, atemp2, btemp1, btemp2;
#endif
    FLOAT *gemvbuffer = (FLOAT *)buffer;
    FLOAT *B = b;

    if (incb != 1) {
        B = buffer;
        gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
        COPY_K(m, b, incb, buffer, 1);
    }

    for (is =0; is < m; is += DTB_ENTRIES) {

        min_i = MIN(m - is, DTB_ENTRIES);

#if (TRANSA) == 1 || (TRANSA == 3)
        if (is > 0) {
#if   TRANSA == 1
            GEMV_N(is, min_i, 0, dp1, ZERO,
                   a + is * lda * 2, lda,
                   B + is       * 2, 1,
                   B,                1, gemvbuffer);
#else
            GEMV_R(is, min_i, 0, dp1, ZERO,
                   a + is * lda * 2, lda,
                   B + is       * 2, 1,
                   B,                1, gemvbuffer);
#endif
        }
#endif

        for (i = 0; i < min_i; i++) {
            FLOAT *AA = a + (is + (i + is) * lda) * 2;
            FLOAT *BB = B + is * 2;

#if (TRANSA == 1) || (TRANSA == 3)
#if   TRANSA == 1
            if (i > 0) AXPYU_K (i, 0, 0, BB[i * 2 + 0],  BB[i * 2 + 1],
                                    AA, 1, BB, 1, NULL, 0);
#else
            if (i > 0) AXPYC_K(i, 0, 0, BB[i * 2 + 0],  BB[i * 2 + 1],
                                   AA, 1, BB, 1, NULL, 0);
#endif
#endif

#ifndef UNIT
            atemp1 = AA[i * 2 + 0];
            atemp2 = AA[i * 2 + 1];

            btemp1 = BB[i * 2 + 0];
            btemp2 = BB[i * 2 + 1];

#if (TRANSA == 1) || (TRANSA == 2)
            BB[i * 2 + 0] = atemp1 * btemp1 - atemp2 * btemp2;
            BB[i * 2 + 1] = atemp1 * btemp2 + atemp2 * btemp1;
#else
            BB[i * 2 + 0] = atemp1 * btemp1 + atemp2 * btemp2;
            BB[i * 2 + 1] = atemp1 * btemp2 - atemp2 * btemp1;
#endif
#endif

#if (TRANSA == 2) || (TRANSA == 4)
            if (i < min_i - 1) {
#if TRANSA == 2
                temp = DOTU_K(min_i - i - 1,
                              AA + (i + 1) * 2, 1,
                              BB + (i + 1) * 2, 1);
#else
                temp = DOTC_K(min_i - i - 1,
                              AA + (i + 1) * 2, 1,
                              BB + (i + 1) * 2, 1);
#endif

                BB[i * 2 + 0] += CREAL(temp);
                BB[i * 2 + 1] += CIMAG(temp);
            }
#endif

        }

#if (TRANSA) == 2 || (TRANSA == 4)
        if (m - is > min_i) {
#if TRANSA == 2
            GEMV_T(m - is - min_i, min_i, 0, dp1, ZERO,
                   a + (is + min_i + is * lda) * 2,  lda,
                   B + (is + min_i) * 2, 1,
                   B +  is            * 2, 1, gemvbuffer);
#else
            GEMV_C(m - is - min_i, min_i, 0, dp1, ZERO,
                   a + (is + min_i + is * lda) * 2,  lda,
                   B + (is + min_i) * 2, 1,
                   B +  is            * 2, 1, gemvbuffer);
#endif
        }
#endif
    }

    if (incb != 1) {
        COPY_K(m, buffer, 1, b, incb);
    }

    return 0;
}
Beispiel #6
0
int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i,
	  FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){

  BLASLONG is, min_i;
  FLOAT *X = x;
  FLOAT *Y = y;
  FLOAT *symbuffer  = buffer;
  FLOAT *gemvbuffer = (FLOAT *)(((BLASLONG)buffer + SYMV_P * SYMV_P * sizeof(FLOAT) * 2 + 4095) & ~4095);
  FLOAT *bufferY    = gemvbuffer;
  FLOAT *bufferX    = gemvbuffer;

  if (incy != 1) {
    Y = bufferY;
    bufferX    = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
    gemvbuffer = bufferX;
    COPY_K(m, y, incy, Y, 1);
  }

  if (incx != 1) {
    X = bufferX;
    gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
    COPY_K(m, x, incx, X, 1);
  }

#ifndef LOWER
  for(is = m - offset; is < m; is += SYMV_P){
    min_i = MIN(m - is, SYMV_P);
#else
  for(is = 0; is < offset; is += SYMV_P){
    min_i = MIN(offset - is, SYMV_P);
#endif
      
#ifndef LOWER
    if (is >0){
      GEMV_T(is, min_i, 0, alpha_r, alpha_i,
	      a + is * lda * COMPSIZE, lda,
	      X,                       1,
	      Y + is * COMPSIZE,       1, gemvbuffer);
      
      GEMV_N(is, min_i, 0, alpha_r, alpha_i,
	      a + is * lda  * COMPSIZE,  lda,
	      X + is * COMPSIZE, 1,
	      Y,                 1, gemvbuffer);
    }
#endif

#ifdef LOWER
    ZSYMCOPY_L(min_i, a + (is + is * lda) * COMPSIZE, lda, symbuffer);
#else
    ZSYMCOPY_U(min_i, a + (is + is * lda) * COMPSIZE, lda, symbuffer);
#endif
    
    GEMV_N(min_i, min_i, 0, alpha_r, alpha_i,
	    symbuffer, min_i,
	    X + is * COMPSIZE, 1,
	    Y + is * COMPSIZE, 1, gemvbuffer);
    

#ifdef LOWER
    if (m - is >  min_i){
      GEMV_T(m - is - min_i, min_i, 0, alpha_r, alpha_i,
	      a + ((is + min_i) + is * lda) * COMPSIZE, lda,
	      X + (is + min_i) * COMPSIZE, 1,
	      Y +  is           * COMPSIZE, 1, gemvbuffer);
      
      GEMV_N(m - is - min_i, min_i, 0, alpha_r, alpha_i,
	      a + ((is + min_i) + is * lda) * COMPSIZE, lda,
	      X +  is           * COMPSIZE, 1,
	      Y + (is + min_i) * COMPSIZE, 1, gemvbuffer);
    }
#endif

  } /* end of is */

  if (incy != 1) {
    COPY_K(m, Y, 1, y, incy);
  }

  return 0;
}