Esempio n. 1
0
sl_enddef

sl_def(kernel21, void,
       sl_glparm(size_t, ncores),
       sl_glparm(size_t, n)
       , sl_glparm(const double*restrict, CX)
       , sl_glparm(size_t, CX_dim0)
       , sl_glparm(size_t, CX_dim1)
       , sl_glparm(double*restrict, PX)
       , sl_glparm(size_t, PX_dim0)
       , sl_glparm(size_t, PX_dim1)
       , sl_glparm(const double*restrict, VY)
       , sl_glparm(size_t, VY_dim0)
       , sl_glparm(size_t, VY_dim1)
    )
{
    assert(sl_getp(PX_dim1) == 25);
    assert(sl_getp(CX_dim1) == 25);
    assert(sl_getp(VY_dim0) == 25);
    assert(sl_getp(VY_dim1) == 25);

    //create the family of the appropriate size
    //specified in the 'inner' array
    sl_create(,, 0, 25*sl_getp(n),1, 0,, cell,
              sl_glarg(size_t, , sl_getp(n)),
              sl_glarg(const double*, , sl_getp(VY)),
              sl_glarg(const double*, , sl_getp(CX)),
              sl_glarg(double*, , sl_getp(PX)));
    sl_sync();
}
Esempio n. 2
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 3
//        Inner Product
//---------------------------------

//---------------------------------
//         q = 0.0;
//         for ( k=0 ; k<n ; k++ )
//         {
//             q += z[k]*x[k];
// 	   }
//---------------------------------

#ifndef NAIVE
#define REDUCTIONS
#endif

sl_def(innerk3, void,
       sl_shfparm(double, Q),
       sl_glparm(const double*restrict, Z),
       sl_glparm(const double*restrict, X))
{
    sl_index(i);

    sl_setp(Q, (sl_getp(Z)[i] * sl_getp(X)[i]) + sl_getp(Q));
}
Esempio n. 3
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 5
//        tri-diagonal
//   elimination, below diagonal
//---------------------------------

//---------------------------------
// for ( i=1 ; i<n ; i++ )
// {
//  x[i] = z[i]*( y[i] - x[i-1] );
// }
//---------------------------------

sl_def(innerk5,void,
       sl_shfparm(double, prevx),
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, Y),
       sl_glparm(const double*restrict, Z))
{
    sl_index(i);

    double newx = sl_getp(Z)[i] * (sl_getp(Y)[i] - sl_getp(prevx));
    sl_setp(prevx, newx);

    sl_getp(X)[i] = newx;
}
sl_def(copy_array, void,
       sl_glparm(INT*, destination),
       sl_glparm(INT*, source))
{
  sl_index(i);
  sl_getp(destination)[i] = sl_getp(source)[i];
}
Esempio n. 5
0
sl_enddef

#ifdef REDUCTIONS

// method to perform a graph reduction of the above dependent kernel over CORES
sl_def(reductionk3, void,
       sl_shfparm(double, Q),
       sl_glparm(const double*restrict, Z),
       sl_glparm(const double*restrict, X),
       sl_glparm(long, iternum))
{
    sl_index(redindex);

    long lower = sl_getp(iternum) * redindex;
    long upper = lower + sl_getp(iternum);

    sl_create(,PLACE_LOCAL, lower, upper, 1,,, innerk3,
              sl_shfarg(double, Qr, 0.0),
              sl_glarg(const double*, , sl_getp(Z)),
              sl_glarg(const double*, , sl_getp(X)));
    sl_sync();

    //now accumilate the results
    sl_setp(Q, sl_geta(Qr) + sl_getp(Q) );
}
Esempio n. 6
0
sl_enddef

sl_def(buf_copy, void,
       sl_glparm(const uint32_t*restrict, src),
       sl_glparm(uint32_t*restrict, dst))
{
  sl_index(i);
  sl_getp(dst)[i] = sl_getp(src)[i];
}
Esempio n. 7
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 21
//    Matrix*Matrix Product
//---------------------------------

//---------------------------------
// 'Original' C:
// for ( k=0 ; k<25 ; k++ )
// {
//   for ( i=0 ; i<25 ; i++ )
//   {
//      for ( j=0 ; j<n ; j++ )
//      {
//          px[j][i] += vy[k][i] * cx[j][k];
//      }
//   }
// }
//---------------------------------

//cell to calculate is computed by taking the remainder
//and result of dividing the thread index by the matrix
//width.
sl_def(cell, void,
       sl_glparm(size_t, n),
       sl_glparm(const double*restrict, VY),
       sl_glparm(const double*restrict, CX),
       sl_glparm(double*restrict, PX))
{
    sl_index(ij);
    const size_t n = sl_getp(n);

    double (*restrict PX)[n][25] = (double (*)[n][25])(double*)sl_getp(PX);
    const double (*restrict CX)[n][25] = (const double (*)[n][25])(const double*)sl_getp(CX);
    const double (*restrict VY)[25][25] = (const double (*)[25][25])(const double*)sl_getp(VY);

    long i = ij % 25;
    long j = ij / 25;

    //N.B. can easily make the following into a new family,
    //but requires a reduction over the number of cores
    long k;
    double px_ij = (*PX)[j][i];
    for (k = 0; k < 25; ++k)
        px_ij += (*VY)[k][i] * (*CX)[j][k];
    //save result
    (*PX)[j][i] = px_ij;
}
Esempio n. 8
0
sl_def(fibo_compute, void,
       sl_shparm(INT, prev), sl_shparm(INT, prev2), sl_glparm(INT*, fibo))
{
  sl_index(i);

  INT n = sl_getp(prev) + sl_getp(prev2);
  sl_setp(prev2, sl_getp(prev));
  sl_setp(prev, n);
  sl_getp(fibo)[i] = n;
}
Esempio n. 9
0
sl_def(icount, void,
      sl_shparm(unsigned, count),
      sl_glparm(unsigned, max))
{
 sl_index(i);
 if (sl_getp(count) >= sl_getp(max)) {
     sl_setp(count, sl_getp(count));
     sl_break ;
 }
 indices[sl_getp(count)] = i;
 sl_setp(count, sl_getp(count) + 1);
}
sl_enddef

/* Further partitions a list of intervals
 * level: level of the current list to be partitioned
 */
sl_def(partition_list_of_intervals, void,
       sl_glparm(INT*, data),
       sl_glparm(SIZE, len),
       sl_glparm(INT*, scratch),
       sl_shparm(INT, level),
       sl_shparm(INT, done)
      ) {
  sl_index(i);

  INT done = sl_getp(done);


  if (!done) {
    INT* d = sl_getp(data);
    SIZE l = sl_getp(len);
    INT level = sl_getp(level);
    INT* scratch = sl_getp(scratch);
    //printf("PARTIITON LIST: level = %d\n", level);

    num_intervals[(level+1)%2] = 0;

    // partition the intervals
    sl_create(,,0,num_intervals[level%2],,,,partition_interval,
        sl_glarg(INT*, gdata, d),
        sl_glarg(INT*, gres, scratch),
        sl_glarg(SIZE, level, level),
        sl_sharg(int, done, 1));
    sl_sync();

    // copy partitoned values back to d
    sl_create(,,0,l,,,,
        copy_array,
        sl_glarg(INT*, gdestination, d),
        sl_glarg(INT*, gsource, scratch));
    sl_sync();
    int j = 0;

    if (num_intervals[(level+1)%2] == 0) {
      // no intervals for the next level => we're done
      sl_setp(level, 0);  // value doesn't matter, just unblock sibling
      sl_setp(done, 1);
    } else {
      // trigger the next sibling to start
      sl_setp(level, level+1);
      sl_setp(done, 0);
    }
  } else {  // if (!done)
// done: used by a thread to signal it's right sibling when it finished
sl_def(partition_interval, void,
       sl_glparm(INT*, data),
       sl_glparm(INT*, result),
       sl_glparm(SIZE, level),
       sl_shparm(int, done)
       ) {
  sl_index(i);

  INT* data = sl_getp(data);
  INT* result = sl_getp(result);
  SIZE level = sl_getp(level);

  int l = intervals[level%2][i].l;
  int r = intervals[level%2][i].r;

  if (l == r) {
    // this interval is sorted (1 element), so don't copy it to the
    // next level
    result[l] = data[l];
    sl_setp(done, sl_getp(done));
  } else {
    sl_create(,,1,r - l + 1,,,,
	      do_partition_interval,
	      sl_glarg(INT*, gdata, data + l),
	      sl_glarg(INT*, gres, result + l),
	      sl_sharg(SIZE, lower, 0),
	      sl_sharg(SIZE, greater, r - l));
    sl_sync();
    SIZE la = sl_geta(lower);
    la = la + l;
    result[la] = data[l];  // put the pivot in the right place
    // copy the 2 new intervals to next level
    // but after the left sibling has done doing the same
    int left_done = sl_getp(done);
    workaround += left_done;  // use the value, so the read doesn't get
                       // optimized away. See comment for workaround.

    if (l < la) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = l;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = la;
      num_intervals[(level+1) % 2]++;
    }
    if (la + 1 < r) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = la + 1;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = r;
      num_intervals[(level+1) % 2]++;
    }

    //signal to the right sibling that I'm done
    sl_setp(done, 0);
  }
}
Esempio n. 12
0
sl_enddef

sl_def(fibo_print, void,
       sl_shparm(INT, guard), sl_glparm(INT*, fibo))
{
  sl_index(i);

  INT p1 = sl_getp(fibo)[i - 2];
  INT p2 = sl_getp(fibo)[i - 1];
  INT p3 = sl_getp(fibo)[i];

  INT n = sl_getp(guard);
  printf("The %luth Fibonacci number is %lu + %lu = %lu\n", (INT)i, p1, p2, p3);
  sl_setp(guard, n);
}
Esempio n. 13
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//----------------------------------------
//      LIVERMORE KERNEL 2
// Incomplete Cholesky Conjugate Gradient
//----------------------------------------

//---------------------------------
// ii = n;
// ipntp = 0;
// do
//  {
//   ipnt = ipntp;
//   ipntp += ii;
//   ii /= 2;
//   i = ipntp;
//   for ( k=ipnt+1 ; k<ipntp ; k=k+2 )
//     {
//       i++;
//       x[i] = x[k] - v[k]*x[k-1] - v[k+1]*x[k+1];
//     }
//  } while ( ii>0 );
//---------------------------------


sl_def(innerk2,void,
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, V),
       sl_glparm(unsigned long, ipnt),
       sl_glparm(unsigned long, ipntp))
{
    sl_index(i);
    unsigned long ipnt = sl_getp(ipnt);
    unsigned long ipntp = sl_getp(ipntp);
    unsigned long k = ipnt + i;
    double*restrict X = sl_getp(X);
    const double*restrict V = sl_getp(V);
    // output_uint(k,2); output_char('\n',2);
    // output_int(ipntp + i / 2, 2); output_char('\n',2);
    X[ipntp + i / 2] = X[k] - V[k] * X[k-1] - V[k+1] * X[k+1];
}
Esempio n. 14
0
sl_def(bar, void, sl_shparm(int, x))
{
    sl_index(i);
    int a[90000];
    a[42] = 123;
    sl_setp(x, sl_getp(x) + foo(a) - i);
}
Esempio n. 15
0
File: crrec.c Progetto: knz/slcore
sl_def(foo,void,sl_glparm(int, x))
{
    if (sl_getp(x) > 0)
    {
        sl_create(,,,,,,,foo,sl_glarg(int,,sl_getp(x)-1));
        sl_sync();
    } else {
Esempio n. 16
0
sl_def(synch_thread, void, sl_shparm(int, s))
{
  int temp;

  temp = sl_getp(s);
  output_int(temp, 1);
  sl_setp(s, temp + 1);
}
Esempio n. 17
0
sl_def(foo, void, sl_shparm(int, token))
{
    int token = sl_getp(token);
    (void)bar("");
    sl_index(i);
    output_int(i, 1); output_char('\n', 1);
    sl_setp(token, token);
}
Esempio n. 18
0
sl_def (thread, void, sl_shparm(int, _s))
{
	int s;
	sl_index(x);
	s = sl_getp(_s);
	s = x + s;
	sl_setp(_s, s);
}
Esempio n. 19
0
File: sprec.c Progetto: knz/slcore
sl_def(foo,void,sl_glparm(int, x))
{
    if (sl_getp(x) > 0)
    {
        sl_spawndecl(f);
        sl_spawn(f,,,,,,,foo,sl_glarg(int,,sl_getp(x)-1));
        sl_spawnsync(f);
    } else {
sl_enddef

// partitions an interval using data[0] as the pivot
// After all threads run, the value of "lower" reported to the parent
// is the position where the pivot should stay.
sl_def(do_partition_interval, void,
       sl_glparm(INT*, data),
       sl_glparm(INT*, result),
       sl_shparm(SIZE, lower),
       sl_shparm(SIZE, greater)) {
  sl_index(i);

  INT* d = sl_getp(data);

  if (d[i] < d[0]) {
    sl_setp(greater, sl_getp(greater));

    SIZE l1 = sl_getp(lower);
    sl_setp(lower, l1+1);
    sl_getp(result)[l1] = d[i];
  } else {
    sl_setp(lower, sl_getp(lower));

    SIZE l2 = sl_getp(greater);
    sl_setp(greater, l2-1);
    sl_getp(result)[l2] = d[i];
  }
}
Esempio n. 21
0
sl_def(do_print, void, sl_shparm(long, tok))
{
  sl_index(i);
  const char *ptr = (const char*)(void*)(long)i;
  char c = *ptr;
  long t = sl_getp(tok);
  output_char(c, 1);
  sl_setp(tok, t);
}
Esempio n. 22
0
sl_def(foo, void, sl_shfparm(double, sarg))
{
  sl_index(i);
  double x = sl_getp(sarg);
  double x2 = x;
  if (i == 0) 
      sl_setp(sarg, x2);
  else 
      sl_setp(sarg, x2+1);
}
Esempio n. 23
0
sl_enddef

sl_def(iprint, int,
      sl_shparm(unsigned, count),
      sl_glparm(unsigned, refcount))
{
 sl_index(i);
 unsigned c = sl_getp(count);
 if (c >= sl_getp(refcount))
 {
     sl_setp(count, c);
     sl_break ;
 }
 output_int(c, 1);
 output_char(' ', 1);
 output_int(indices[c], 1);
 output_char('\n', 1);
 sl_setp(count, c + 1);
}
Esempio n. 24
0
sl_enddef

sl_def(outerk2, void,
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, V),
       sl_shparm(unsigned long, ii),
       sl_shparm(unsigned long, ipntp))
{
  sl_index(m);

  unsigned long ipnt, ii;
  unsigned long ipntp = (ii = sl_getp(ii)) + (ipnt = sl_getp(ipntp));
  sl_setp(ii, ii/2);
  sl_create(,,1,ii,2,,, innerk2,
	    sl_glarg(double*restrict, , sl_getp(X)),
	    sl_glarg(const double*restrict, , sl_getp(V)),
	    sl_glarg(unsigned long, , ipnt),
	    sl_glarg(unsigned long, , ipntp));
  sl_sync();
  sl_setp(ipntp, ipntp);
}
Esempio n. 25
0
sl_def(sha_main_inner, void,
       sl_glparm(const uint32_t*restrict, w),
       sl_shparm(unsigned long, a),
       sl_shparm(unsigned long, b),
       sl_shparm(unsigned long, c),
       sl_shparm(unsigned long, d),
       sl_shparm(unsigned long, e))
{
  sl_index(i);
  uint32_t d = sl_getp(d);
  uint32_t e = sl_getp(e);
  sl_setp(e, d);
  uint32_t c = sl_getp(c);
  sl_setp(d, c);
  uint32_t b = sl_getp(b);
  sl_setp(c, ROL32(b, 30));
  uint32_t a = sl_getp(a);
  sl_setp(b, a);
  uint32_t tmp = ROL32(a, 5) + e + sl_getp(w)[i];
  if (i < 20) {
    tmp += (b & c) | ((~b) & d);
    tmp += 0x5A827999L;
  } else if (i < 40) {
    tmp += (b ^ c ^ d);
    tmp += 0x6ED9EBA1L;
  } else if (i < 60) {
    tmp += (b & c) | (b & d) | (c & d);
    tmp += 0x8F1BBCDCL;
  } else {
    tmp += (b ^ c ^ d);
    tmp += 0xCA62C1D6L;
  }
  sl_setp(a, tmp);
}
Esempio n. 26
0
sl_def(roman, void, sl_glparm(short, x))
{
  long num = sl_getp(x);
  if (unlikely(num < 0)) {
    output_char('-', 1);
    num = -num;
  }

  struct roman_table_t *p = roman_table;
  const char *s;

  for (p = roman_table; p->base; ++p)
    while(likely(num >= p->base)) {
      for (s = p->repr; *s; ++s) output_char(*s, 1);
      num = num - p->base;
    };
}
Esempio n. 27
0
sl_enddef


/**
GOL
**/
sl_def(gol,void,sl_shparm(int,breaked))
{

	sl_index(index);
	int flag = sl_getp(breaked);
		
	if(flag==0)
	{	
		//info_print("GOL Thread %d :Processing %d blocks\n", index, b_queue->elements);
		//printf("GOL Thread %d :Processing %d blocks\n", index, b_queue->elements);
		//info_print("%d\n",iter);
		if( index + 1 == cycle)
			flag = 1;
		debug_print("Creating Worker family...\n");
		sl_create(,,,,0,block_size,,run,sl_glarg(int,iteration,index),sl_sharg(int,sta,0));
		debug_print("Waiting for sync...\n");
		sl_sync();
        
		debug_print("Workers finished, processing request queue...\n");
		sl_create(,,,,0,block_size,,process_requests,sl_sharg(int,stat,0));
		debug_print("Waiting for sync...\n");
		sl_sync();
  		
		debug_print("Request queue processing finished, traversing...\n");
		
		b_queue->elements = 0;
	
		sl_create(,,,,0,block_size,,traverse,sl_sharg(int,state,1),sl_sharg(struct hashtable_itr*,itr,hashtable_iterator(table)));
		debug_print("Waiting for sync...\n");
		sl_sync();			
		
		
		sl_setp(breaked,flag);
		
		if(flag == 1)
			sl_break;				
	}	
	else
	{
Esempio n. 28
0
sl_enddef

#include <math.h>

sl_def(kernel2, void,
       sl_glparm(size_t, ncores),
       sl_glparm(size_t, n),
       sl_glparm(const double*restrict, V),
       sl_glparm(size_t, V_dim),
       sl_glparm(double*restrict, X),
       sl_glparm(size_t, X_dim))
{
    // output_int(sl_getp(n), 2); output_char('\n', 2);
    unsigned long upper = log2(sl_getp(n));

    sl_create(,,upper,-1,-1,2,, outerk2,
	      sl_glarg(double*restrict, , sl_getp(X)),
	      sl_glarg(const double*restrict, , sl_getp(V)),
              sl_sharg(unsigned long, ii, sl_getp(n)),
	      sl_sharg(unsigned long, ipntp, 0));
    sl_sync();
}
Esempio n. 29
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 1
//        Hydro Fragment
//---------------------------------

//---------------------------------
// 'Original' C:
//        for ( k=0 ; k<n ; k++ ) {
//            x[k] = q + y[k]*( r*z[k+10] + t*z[k+11] );
//        }
//---------------------------------

//Break down kernel into two families
//this one does the 'meat'
sl_def(innerk1, void,
       sl_glparm(double*restrict, X),
       sl_glfparm(double, Q),
       sl_glparm(const double*restrict, Y),
       sl_glfparm(double, R),
       sl_glparm(const double*restrict, ZX),
       sl_glfparm(double, T) )
{
    sl_index(i);

    //now the actual calculation
    sl_getp(X)[i] = sl_getp(Q) + sl_getp(Y)[i] *
        ( sl_getp(R) * sl_getp(ZX)[i+10]
          + sl_getp(T) * sl_getp(ZX)[i+11] );
}
Esempio n. 30
0
sl_def(foo, void, sl_shparm(int, a))
{
  sl_setp(a, sl_getp(a) + 1);
}