Example #1
0
sl_def(do_print, void, sl_shparm(long, tok))
{
  sl_index(i);
  const char *ptr = (const char*)(void*)(long)i;
  char c = *ptr;
  long t = sl_getp(tok);
  output_char(c, 1);
  sl_setp(tok, t);
}
Example #2
0
sl_enddef

sl_def(buf_copy, void,
       sl_glparm(const uint32_t*restrict, src),
       sl_glparm(uint32_t*restrict, dst))
{
  sl_index(i);
  sl_getp(dst)[i] = sl_getp(src)[i];
}
Example #3
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 21
//    Matrix*Matrix Product
//---------------------------------

//---------------------------------
// 'Original' C:
// for ( k=0 ; k<25 ; k++ )
// {
//   for ( i=0 ; i<25 ; i++ )
//   {
//      for ( j=0 ; j<n ; j++ )
//      {
//          px[j][i] += vy[k][i] * cx[j][k];
//      }
//   }
// }
//---------------------------------

//cell to calculate is computed by taking the remainder
//and result of dividing the thread index by the matrix
//width.
sl_def(cell, void,
       sl_glparm(size_t, n),
       sl_glparm(const double*restrict, VY),
       sl_glparm(const double*restrict, CX),
       sl_glparm(double*restrict, PX))
{
    sl_index(ij);
    const size_t n = sl_getp(n);

    double (*restrict PX)[n][25] = (double (*)[n][25])(double*)sl_getp(PX);
    const double (*restrict CX)[n][25] = (const double (*)[n][25])(const double*)sl_getp(CX);
    const double (*restrict VY)[25][25] = (const double (*)[25][25])(const double*)sl_getp(VY);

    long i = ij % 25;
    long j = ij / 25;

    //N.B. can easily make the following into a new family,
    //but requires a reduction over the number of cores
    long k;
    double px_ij = (*PX)[j][i];
    for (k = 0; k < 25; ++k)
        px_ij += (*VY)[k][i] * (*CX)[j][k];
    //save result
    (*PX)[j][i] = px_ij;
}
Example #4
0
sl_def(foo, void, sl_shfparm(double, sarg))
{
  sl_index(i);
  double x = sl_getp(sarg);
  double x2 = x;
  if (i == 0) 
      sl_setp(sarg, x2);
  else 
      sl_setp(sarg, x2+1);
}
Example #5
0
sl_def(fibo_compute, void,
       sl_shparm(INT, prev), sl_shparm(INT, prev2), sl_glparm(INT*, fibo))
{
  sl_index(i);

  INT n = sl_getp(prev) + sl_getp(prev2);
  sl_setp(prev2, sl_getp(prev));
  sl_setp(prev, n);
  sl_getp(fibo)[i] = n;
}
Example #6
0
sl_def(icount, void,
      sl_shparm(unsigned, count),
      sl_glparm(unsigned, max))
{
 sl_index(i);
 if (sl_getp(count) >= sl_getp(max)) {
     sl_setp(count, sl_getp(count));
     sl_break ;
 }
 indices[sl_getp(count)] = i;
 sl_setp(count, sl_getp(count) + 1);
}
// done: used by a thread to signal it's right sibling when it finished
sl_def(partition_interval, void,
       sl_glparm(INT*, data),
       sl_glparm(INT*, result),
       sl_glparm(SIZE, level),
       sl_shparm(int, done)
       ) {
  sl_index(i);

  INT* data = sl_getp(data);
  INT* result = sl_getp(result);
  SIZE level = sl_getp(level);

  int l = intervals[level%2][i].l;
  int r = intervals[level%2][i].r;

  if (l == r) {
    // this interval is sorted (1 element), so don't copy it to the
    // next level
    result[l] = data[l];
    sl_setp(done, sl_getp(done));
  } else {
    sl_create(,,1,r - l + 1,,,,
	      do_partition_interval,
	      sl_glarg(INT*, gdata, data + l),
	      sl_glarg(INT*, gres, result + l),
	      sl_sharg(SIZE, lower, 0),
	      sl_sharg(SIZE, greater, r - l));
    sl_sync();
    SIZE la = sl_geta(lower);
    la = la + l;
    result[la] = data[l];  // put the pivot in the right place
    // copy the 2 new intervals to next level
    // but after the left sibling has done doing the same
    int left_done = sl_getp(done);
    workaround += left_done;  // use the value, so the read doesn't get
                       // optimized away. See comment for workaround.

    if (l < la) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = l;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = la;
      num_intervals[(level+1) % 2]++;
    }
    if (la + 1 < r) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = la + 1;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = r;
      num_intervals[(level+1) % 2]++;
    }

    //signal to the right sibling that I'm done
    sl_setp(done, 0);
  }
}
sl_enddef

/* Further partitions a list of intervals
 * level: level of the current list to be partitioned
 */
sl_def(partition_list_of_intervals, void,
       sl_glparm(INT*, data),
       sl_glparm(SIZE, len),
       sl_glparm(INT*, scratch),
       sl_shparm(INT, level),
       sl_shparm(INT, done)
      ) {
  sl_index(i);

  INT done = sl_getp(done);


  if (!done) {
    INT* d = sl_getp(data);
    SIZE l = sl_getp(len);
    INT level = sl_getp(level);
    INT* scratch = sl_getp(scratch);
    //printf("PARTIITON LIST: level = %d\n", level);

    num_intervals[(level+1)%2] = 0;

    // partition the intervals
    sl_create(,,0,num_intervals[level%2],,,,partition_interval,
        sl_glarg(INT*, gdata, d),
        sl_glarg(INT*, gres, scratch),
        sl_glarg(SIZE, level, level),
        sl_sharg(int, done, 1));
    sl_sync();

    // copy partitoned values back to d
    sl_create(,,0,l,,,,
        copy_array,
        sl_glarg(INT*, gdestination, d),
        sl_glarg(INT*, gsource, scratch));
    sl_sync();
    int j = 0;

    if (num_intervals[(level+1)%2] == 0) {
      // no intervals for the next level => we're done
      sl_setp(level, 0);  // value doesn't matter, just unblock sibling
      sl_setp(done, 1);
    } else {
      // trigger the next sibling to start
      sl_setp(level, level+1);
      sl_setp(done, 0);
    }
  } else {  // if (!done)
Example #9
0
sl_enddef

sl_def(fibo_print, void,
       sl_shparm(INT, guard), sl_glparm(INT*, fibo))
{
  sl_index(i);

  INT p1 = sl_getp(fibo)[i - 2];
  INT p2 = sl_getp(fibo)[i - 1];
  INT p3 = sl_getp(fibo)[i];

  INT n = sl_getp(guard);
  printf("The %luth Fibonacci number is %lu + %lu = %lu\n", (INT)i, p1, p2, p3);
  sl_setp(guard, n);
}
Example #10
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//----------------------------------------
//      LIVERMORE KERNEL 2
// Incomplete Cholesky Conjugate Gradient
//----------------------------------------

//---------------------------------
// ii = n;
// ipntp = 0;
// do
//  {
//   ipnt = ipntp;
//   ipntp += ii;
//   ii /= 2;
//   i = ipntp;
//   for ( k=ipnt+1 ; k<ipntp ; k=k+2 )
//     {
//       i++;
//       x[i] = x[k] - v[k]*x[k-1] - v[k+1]*x[k+1];
//     }
//  } while ( ii>0 );
//---------------------------------


sl_def(innerk2,void,
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, V),
       sl_glparm(unsigned long, ipnt),
       sl_glparm(unsigned long, ipntp))
{
    sl_index(i);
    unsigned long ipnt = sl_getp(ipnt);
    unsigned long ipntp = sl_getp(ipntp);
    unsigned long k = ipnt + i;
    double*restrict X = sl_getp(X);
    const double*restrict V = sl_getp(V);
    // output_uint(k,2); output_char('\n',2);
    // output_int(ipntp + i / 2, 2); output_char('\n',2);
    X[ipntp + i / 2] = X[k] - V[k] * X[k-1] - V[k+1] * X[k+1];
}
Example #11
0
sl_enddef

sl_def(sha_main_outer, void,
       sl_glparm(const uint32_t*restrict, input),
       sl_shparm(unsigned long, h0),
       sl_shparm(unsigned long, h1),
       sl_shparm(unsigned long, h2),
       sl_shparm(unsigned long, h3),
       sl_shparm(unsigned long, h4))
{
  sl_index(offset_base);
  int i;
  const uint32_t*restrict input = sl_getp(input) + offset_base;

  /* word extension: not easily made concurrent! */
  uint32_t w[80];
  sl_create(,PLACE_LOCAL,,16,,,, buf_copy,
	    sl_glarg(const uint32_t*restrict, src, input),
	    sl_glarg(uint32_t*restrict, dst, w));
  sl_sync();
  //  for (i = 0; i < 16; ++i) w[i] = input[i];
  for (i = 16; i < 80; ++i) {
    uint32_t x = w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16];
    w[i] = ROL32(x, 1);
  }

  sl_create(,,,80,,,, sha_main_inner,
	    sl_glarg(const uint32_t*restrict, wg, w),
	    sl_sharg(unsigned long, a),
	    sl_sharg(unsigned long, b),
	    sl_sharg(unsigned long, c),
	    sl_sharg(unsigned long, d),
	    sl_sharg(unsigned long, e));
  sl_seta(a, sl_getp(h0));
  sl_seta(b, sl_getp(h1));
  sl_seta(c, sl_getp(h2));
  sl_seta(d, sl_getp(h3));
  sl_seta(e, sl_getp(h4));
  sl_sync();
  sl_setp(h0, sl_getp(h0) + sl_geta(a));
  sl_setp(h1, sl_getp(h1) + sl_geta(b));
  sl_setp(h2, sl_getp(h2) + sl_geta(c));
  sl_setp(h3, sl_getp(h3) + sl_geta(d));
  sl_setp(h4, sl_getp(h4) + sl_geta(e));
}
Example #12
0
sl_enddef


/**
GOL
**/
sl_def(gol,void,sl_shparm(int,breaked))
{

	sl_index(index);
	int flag = sl_getp(breaked);
		
	if(flag==0)
	{	
		//info_print("GOL Thread %d :Processing %d blocks\n", index, b_queue->elements);
		//printf("GOL Thread %d :Processing %d blocks\n", index, b_queue->elements);
		//info_print("%d\n",iter);
		if( index + 1 == cycle)
			flag = 1;
		debug_print("Creating Worker family...\n");
		sl_create(,,,,0,block_size,,run,sl_glarg(int,iteration,index),sl_sharg(int,sta,0));
		debug_print("Waiting for sync...\n");
		sl_sync();
        
		debug_print("Workers finished, processing request queue...\n");
		sl_create(,,,,0,block_size,,process_requests,sl_sharg(int,stat,0));
		debug_print("Waiting for sync...\n");
		sl_sync();
  		
		debug_print("Request queue processing finished, traversing...\n");
		
		b_queue->elements = 0;
	
		sl_create(,,,,0,block_size,,traverse,sl_sharg(int,state,1),sl_sharg(struct hashtable_itr*,itr,hashtable_iterator(table)));
		debug_print("Waiting for sync...\n");
		sl_sync();			
		
		
		sl_setp(breaked,flag);
		
		if(flag == 1)
			sl_break;				
	}	
	else
	{
Example #13
0
sl_enddef

sl_def(iprint, int,
      sl_shparm(unsigned, count),
      sl_glparm(unsigned, refcount))
{
 sl_index(i);
 unsigned c = sl_getp(count);
 if (c >= sl_getp(refcount))
 {
     sl_setp(count, c);
     sl_break ;
 }
 output_int(c, 1);
 output_char(' ', 1);
 output_int(indices[c], 1);
 output_char('\n', 1);
 sl_setp(count, c + 1);
}
Example #14
0
sl_enddef

sl_def(outerk2, void,
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, V),
       sl_shparm(unsigned long, ii),
       sl_shparm(unsigned long, ipntp))
{
  sl_index(m);

  unsigned long ipnt, ii;
  unsigned long ipntp = (ii = sl_getp(ii)) + (ipnt = sl_getp(ipntp));
  sl_setp(ii, ii/2);
  sl_create(,,1,ii,2,,, innerk2,
	    sl_glarg(double*restrict, , sl_getp(X)),
	    sl_glarg(const double*restrict, , sl_getp(V)),
	    sl_glarg(unsigned long, , ipnt),
	    sl_glarg(unsigned long, , ipntp));
  sl_sync();
  sl_setp(ipntp, ipntp);
}
Example #15
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 7
//      equation of state
//          fragment
//---------------------------------

//---------------------------------
// for ( k=0 ; k<n ; k++ )
// {
//   x[k] = u[k] + r*( z[k] + r*y[k] ) +
//    t*( u[k+3] + r*( u[k+2] + r*u[k+1] ) +
//    t*( u[k+6] + q*( u[k+5] + q*u[k+4] ) ) );
//  }
//---------------------------------

//independent loop
sl_def(innerk7, void,
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, U),
       sl_glparm(const double*restrict, Z),
       sl_glparm(const double*restrict, Y),
       sl_glfparm(double, R),
       sl_glfparm(double, T),
       sl_glfparm(double, Q))
{
    sl_index(k);

    sl_getp(X)[k] =    sl_getp(U)[k  ] + sl_getp(R) * ( sl_getp(Z)[k  ] + sl_getp(R) * sl_getp(Y)[k  ] ) +
        sl_getp(T) * ( sl_getp(U)[k+3] + sl_getp(R) * ( sl_getp(U)[k+2] + sl_getp(R) * sl_getp(U)[k+1] ) +
        sl_getp(T) * ( sl_getp(U)[k+6] + sl_getp(Q) * ( sl_getp(U)[k+5] + sl_getp(Q) * sl_getp(U)[k+4] ) ) );
}
Example #16
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 1
//        Hydro Fragment
//---------------------------------

//---------------------------------
// 'Original' C:
//        for ( k=0 ; k<n ; k++ ) {
//            x[k] = q + y[k]*( r*z[k+10] + t*z[k+11] );
//        }
//---------------------------------

//Break down kernel into two families
//this one does the 'meat'
sl_def(innerk1, void,
       sl_glparm(double*restrict, X),
       sl_glfparm(double, Q),
       sl_glparm(const double*restrict, Y),
       sl_glfparm(double, R),
       sl_glparm(const double*restrict, ZX),
       sl_glfparm(double, T) )
{
    sl_index(i);

    //now the actual calculation
    sl_getp(X)[i] = sl_getp(Q) + sl_getp(Y)[i] *
        ( sl_getp(R) * sl_getp(ZX)[i+10]
          + sl_getp(T) * sl_getp(ZX)[i+11] );
}
sl_def(computeDataT, void, sl_glparm(int*, data), sl_glparm(int, length))
{
  sl_index(cnt);

  sl_getp(data)[cnt] = (cnt % 2) ? cnt : sl_getp(length) - cnt;
}
Example #18
0
sl_def(foo, void)
{
  sl_index(i);
}