Exemplo n.º 1
0
// Tensors must be sorted from most to least significant dimension.
// nc is the number of contracted indices, and
// ind is an array of (nx by 2) integers (pairs of contracted indices).
Tensor *tensdot(Tensor *a, Tensor *b, int nc, int *ind, int nref, Slice *m) {
    int i = 0;
    int ord_a, ord_b;
    int *anum, *bnum;
    int an, bn;
    Tensor *t;

#ifdef PARANOID
    if(nc > a->n || nc > b->n) {
        fprintf(stderr, "tensdor: more contracted than input dims.\n");
        exit(1);
    }
#endif
    anum = number_indices(a->n, nc, ind);
    bnum = number_indices(b->n, nc, ind+1);

    t = tensor_ctor(a->n + b->n - 2*nc);
    an = copy_shape(a, anum, t->shape);
    bn = copy_shape(b, bnum, t->shape + a->n - nc);
    t->len = an*bn;
    t->b = reserve_block(m, nref, t->len*sizeof(float));

    if(nc == 0) { // trivial case A <- a X Y^T + A, A m x n
        GER(bn, an, 1.0, b->b->x, 1, a->b->x, 1,
                t->b->x, an);
        //GER(bn, an, a->scale*b->scale, b->b->x, 1, a->b->x, 1,
        //        t->b->x, an);
        free(anum); free(bnum);
        return t;
    }

    if( (ord_a = ck_ordered(a->n, anum))) {
        if( (ord_b = ck_ordered(b->n, bnum))
                && ck_same(nc, ind)) { // Straightforward dgemm.
            GEMM(ord_b == 1, ord_a == -1, bn, an, a->len / an,
                    1.0, a->b->x, an, b->b->x, bn,
                    0.0, t->b->x, bn);
            //GEMM(ord_b == 1, ord_a == -1, bn, an, a->len / an,
            //      a->scale*b->scale, a->b->x, an, b->b->x, bn,
            //      0.0, t->b->x, bn);
        // TODO: decide if transposing A would give better perf.
        } else { // Need to transpose B.
            printf("Need to transpose B.\n");
        }
    } else if( (ord_b = ck_ordered(b->n, bnum))) { // Need to transpose A
            printf("Need to transpose A.\n");
    } else { // Need to transpose both A and B.
            printf("Need to transpose A & B.\n");
    }

    free(anum);
    free(bnum);
    return t;
}
Exemplo n.º 2
0
Arquivo: ger.c Projeto: 4ker/OpenBLAS
int main(int argc, char *argv[]){

  FLOAT *a, *x, *y;
  FLOAT alpha[] = {1.0, 1.0};
  blasint m, i, j;
  blasint inc_x=1,inc_y=1;
  blasint n=0;
  int has_param_n = 0;
  int loops = 1;
  int l;
  char *p;

  int from =   1;
  int to   = 200;
  int step =   1;

  struct timeval start, stop;
  double time1,timeg;

  argc--;argv++;

  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}

  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
  if ((p = getenv("OPENBLAS_PARAM_N"))) {
	  n = atoi(p);
	  if ((n>0) && (n<=to)) has_param_n = 1;
  }

  if ( has_param_n == 1 )
    fprintf(stderr, "From : %3d  To : %3d Step = %3d N = %d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,n,inc_x,inc_y,loops);
  else
    fprintf(stderr, "From : %3d  To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);

  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

#ifdef linux
  srandom(getpid());
#endif

  fprintf(stderr, "   SIZE       Flops\n");

  for(m = from; m <= to; m += step)
  {

   timeg=0;

   if ( has_param_n == 0 ) n = m;

   fprintf(stderr, " %6dx%d : ", (int)m,(int)n);

   for(j = 0; j < m; j++){
      		for(i = 0; i < n * COMPSIZE; i++){
			a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
      		}
   }


   for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
			x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
   }

   for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
			y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
   }

    for (l=0; l<loops; l++)
    {

    	gettimeofday( &start, (struct timezone *)0);

    	GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);

    	gettimeofday( &stop, (struct timezone *)0);

    	time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

	timeg += time1;

    }

    timeg /= loops;

    fprintf(stderr,
	    " %10.2f MFlops\n",
	    COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);

  }

  return 0;
}
Exemplo n.º 3
0
void NAME(blasint *M, blasint *N, FLOAT *Alpha,
	  FLOAT *x, blasint *INCX,
	  FLOAT *y, blasint *INCY,
	  FLOAT *a, blasint *LDA){

  blasint    m     = *M;
  blasint    n     = *N;
  FLOAT  alpha = *Alpha;
  blasint    incx  = *INCX;
  blasint    incy  = *INCY;
  blasint    lda   = *LDA;
  FLOAT *buffer;
#ifdef SMP
  int nthreads;
#endif

  blasint info;

  PRINT_DEBUG_NAME;

  info = 0;

  if (lda < MAX(1,m)) info = 9;
  if (incy == 0)      info = 7;
  if (incx == 0)      info = 5;
  if (n < 0)          info = 2;
  if (m < 0)          info = 1;

  if (info){
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
    return;
  }

#else

void CNAME(enum CBLAS_ORDER order,
	   blasint m, blasint n,
	   FLOAT alpha,
	   FLOAT  *x, blasint incx,
	   FLOAT  *y, blasint incy,
	   FLOAT  *a, blasint lda) {

  FLOAT *buffer;
  blasint info, t;
#ifdef SMP
  int nthreads;
#endif

  PRINT_DEBUG_CNAME;

  info  =  0;

  if (order == CblasColMajor) {
    info = -1;

    if (lda < MAX(1,m)) info = 9;
    if (incy == 0)      info = 7;
    if (incx == 0)      info = 5;
    if (n < 0)          info = 2;
    if (m < 0)          info = 1;
  }

  if (order == CblasRowMajor) {
    info = -1;

    t = n;
    n = m;
    m = t;

    t    = incx;
    incx = incy;
    incy = t;

    buffer = x;
    x = y;
    y = buffer;

    if (lda < MAX(1,m)) info = 9;
    if (incy == 0)      info = 7;
    if (incx == 0)      info = 5;
    if (n < 0)          info = 2;
    if (m < 0)          info = 1;
  }

  if (info >= 0) {
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
    return;
  }

#endif

  /*     Quick return if possible. */
  if (m == 0 || n == 0) return;
  if (alpha == 0.) return;
  
  IDEBUG_START;

  FUNCTION_PROFILE_START();

  if (incy < 0) y -= (n - 1) * incy;
  if (incx < 0) x -= (m - 1) * incx;

  buffer = (FLOAT *)blas_memory_alloc(1);

#ifdef SMP
  nthreads = num_cpu_avail(2);

  if (nthreads == 1) {
#endif

    GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);

#ifdef SMP
  } else {
    
    GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);
    
  }
#endif

  blas_memory_free(buffer);

  FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);

  IDEBUG_END;

  return;
}