示例#1
0
sl_def(sha_main_inner, void,
       sl_glparm(const uint32_t*restrict, w),
       sl_shparm(unsigned long, a),
       sl_shparm(unsigned long, b),
       sl_shparm(unsigned long, c),
       sl_shparm(unsigned long, d),
       sl_shparm(unsigned long, e))
{
  sl_index(i);
  uint32_t d = sl_getp(d);
  uint32_t e = sl_getp(e);
  sl_setp(e, d);
  uint32_t c = sl_getp(c);
  sl_setp(d, c);
  uint32_t b = sl_getp(b);
  sl_setp(c, ROL32(b, 30));
  uint32_t a = sl_getp(a);
  sl_setp(b, a);
  uint32_t tmp = ROL32(a, 5) + e + sl_getp(w)[i];
  if (i < 20) {
    tmp += (b & c) | ((~b) & d);
    tmp += 0x5A827999L;
  } else if (i < 40) {
    tmp += (b ^ c ^ d);
    tmp += 0x6ED9EBA1L;
  } else if (i < 60) {
    tmp += (b & c) | (b & d) | (c & d);
    tmp += 0x8F1BBCDCL;
  } else {
    tmp += (b ^ c ^ d);
    tmp += 0xCA62C1D6L;
  }
  sl_setp(a, tmp);
}
sl_enddef

// partitions an interval using data[0] as the pivot
// After all threads run, the value of "lower" reported to the parent
// is the position where the pivot should stay.
sl_def(do_partition_interval, void,
       sl_glparm(INT*, data),
       sl_glparm(INT*, result),
       sl_shparm(SIZE, lower),
       sl_shparm(SIZE, greater)) {
  sl_index(i);

  INT* d = sl_getp(data);

  if (d[i] < d[0]) {
    sl_setp(greater, sl_getp(greater));

    SIZE l1 = sl_getp(lower);
    sl_setp(lower, l1+1);
    sl_getp(result)[l1] = d[i];
  } else {
    sl_setp(lower, sl_getp(lower));

    SIZE l2 = sl_getp(greater);
    sl_setp(greater, l2-1);
    sl_getp(result)[l2] = d[i];
  }
}
示例#3
0
文件: shfloat.c 项目: jianfu/slcore
sl_def(foo, void, sl_shfparm(double, sarg))
{
  sl_index(i);
  double x = sl_getp(sarg);
  double x2 = x;
  if (i == 0) 
      sl_setp(sarg, x2);
  else 
      sl_setp(sarg, x2+1);
}
示例#4
0
sl_def(fibo_compute, void,
       sl_shparm(INT, prev), sl_shparm(INT, prev2), sl_glparm(INT*, fibo))
{
  sl_index(i);

  INT n = sl_getp(prev) + sl_getp(prev2);
  sl_setp(prev2, sl_getp(prev));
  sl_setp(prev, n);
  sl_getp(fibo)[i] = n;
}
示例#5
0
sl_def(icount, void,
      sl_shparm(unsigned, count),
      sl_glparm(unsigned, max))
{
 sl_index(i);
 if (sl_getp(count) >= sl_getp(max)) {
     sl_setp(count, sl_getp(count));
     sl_break ;
 }
 indices[sl_getp(count)] = i;
 sl_setp(count, sl_getp(count) + 1);
}
// done: used by a thread to signal it's right sibling when it finished
sl_def(partition_interval, void,
       sl_glparm(INT*, data),
       sl_glparm(INT*, result),
       sl_glparm(SIZE, level),
       sl_shparm(int, done)
       ) {
  sl_index(i);

  INT* data = sl_getp(data);
  INT* result = sl_getp(result);
  SIZE level = sl_getp(level);

  int l = intervals[level%2][i].l;
  int r = intervals[level%2][i].r;

  if (l == r) {
    // this interval is sorted (1 element), so don't copy it to the
    // next level
    result[l] = data[l];
    sl_setp(done, sl_getp(done));
  } else {
    sl_create(,,1,r - l + 1,,,,
	      do_partition_interval,
	      sl_glarg(INT*, gdata, data + l),
	      sl_glarg(INT*, gres, result + l),
	      sl_sharg(SIZE, lower, 0),
	      sl_sharg(SIZE, greater, r - l));
    sl_sync();
    SIZE la = sl_geta(lower);
    la = la + l;
    result[la] = data[l];  // put the pivot in the right place
    // copy the 2 new intervals to next level
    // but after the left sibling has done doing the same
    int left_done = sl_getp(done);
    workaround += left_done;  // use the value, so the read doesn't get
                       // optimized away. See comment for workaround.

    if (l < la) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = l;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = la;
      num_intervals[(level+1) % 2]++;
    }
    if (la + 1 < r) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = la + 1;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = r;
      num_intervals[(level+1) % 2]++;
    }

    //signal to the right sibling that I'm done
    sl_setp(done, 0);
  }
}
sl_enddef

/* Further partitions a list of intervals
 * level: level of the current list to be partitioned
 */
sl_def(partition_list_of_intervals, void,
       sl_glparm(INT*, data),
       sl_glparm(SIZE, len),
       sl_glparm(INT*, scratch),
       sl_shparm(INT, level),
       sl_shparm(INT, done)
      ) {
  sl_index(i);

  INT done = sl_getp(done);


  if (!done) {
    INT* d = sl_getp(data);
    SIZE l = sl_getp(len);
    INT level = sl_getp(level);
    INT* scratch = sl_getp(scratch);
    //printf("PARTIITON LIST: level = %d\n", level);

    num_intervals[(level+1)%2] = 0;

    // partition the intervals
    sl_create(,,0,num_intervals[level%2],,,,partition_interval,
        sl_glarg(INT*, gdata, d),
        sl_glarg(INT*, gres, scratch),
        sl_glarg(SIZE, level, level),
        sl_sharg(int, done, 1));
    sl_sync();

    // copy partitoned values back to d
    sl_create(,,0,l,,,,
        copy_array,
        sl_glarg(INT*, gdestination, d),
        sl_glarg(INT*, gsource, scratch));
    sl_sync();
    int j = 0;

    if (num_intervals[(level+1)%2] == 0) {
      // no intervals for the next level => we're done
      sl_setp(level, 0);  // value doesn't matter, just unblock sibling
      sl_setp(done, 1);
    } else {
      // trigger the next sibling to start
      sl_setp(level, level+1);
      sl_setp(done, 0);
    }
  } else {  // if (!done)
示例#8
0
sl_def(bar, void, sl_shparm(int, x))
{
    sl_index(i);
    int a[90000];
    a[42] = 123;
    sl_setp(x, sl_getp(x) + foo(a) - i);
}
示例#9
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 3
//        Inner Product
//---------------------------------

//---------------------------------
//         q = 0.0;
//         for ( k=0 ; k<n ; k++ )
//         {
//             q += z[k]*x[k];
// 	   }
//---------------------------------

#ifndef NAIVE
#define REDUCTIONS
#endif

sl_def(innerk3, void,
       sl_shfparm(double, Q),
       sl_glparm(const double*restrict, Z),
       sl_glparm(const double*restrict, X))
{
    sl_index(i);

    sl_setp(Q, (sl_getp(Z)[i] * sl_getp(X)[i]) + sl_getp(Q));
}
示例#10
0
[[]]
//---------------------------------
// Livemore Loops -- SLC (uTC)
// M.A.Hicks, CSA Group, UvA
// Implementation based on various
// reference implementations
// including the original FORTRAN
// but mostly from
// Roy Longbottom, 1996.
//---------------------------------
//      LIVERMORE KERNEL 5
//        tri-diagonal
//   elimination, below diagonal
//---------------------------------

//---------------------------------
// for ( i=1 ; i<n ; i++ )
// {
//  x[i] = z[i]*( y[i] - x[i-1] );
// }
//---------------------------------

sl_def(innerk5,void,
       sl_shfparm(double, prevx),
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, Y),
       sl_glparm(const double*restrict, Z))
{
    sl_index(i);

    double newx = sl_getp(Z)[i] * (sl_getp(Y)[i] - sl_getp(prevx));
    sl_setp(prevx, newx);

    sl_getp(X)[i] = newx;
}
示例#11
0
sl_enddef

#ifdef REDUCTIONS

// method to perform a graph reduction of the above dependent kernel over CORES
sl_def(reductionk3, void,
       sl_shfparm(double, Q),
       sl_glparm(const double*restrict, Z),
       sl_glparm(const double*restrict, X),
       sl_glparm(long, iternum))
{
    sl_index(redindex);

    long lower = sl_getp(iternum) * redindex;
    long upper = lower + sl_getp(iternum);

    sl_create(,PLACE_LOCAL, lower, upper, 1,,, innerk3,
              sl_shfarg(double, Qr, 0.0),
              sl_glarg(const double*, , sl_getp(Z)),
              sl_glarg(const double*, , sl_getp(X)));
    sl_sync();

    //now accumilate the results
    sl_setp(Q, sl_geta(Qr) + sl_getp(Q) );
}
示例#12
0
sl_def (thread, void, sl_shparm(int, _s))
{
	int s;
	sl_index(x);
	s = sl_getp(_s);
	s = x + s;
	sl_setp(_s, s);
}
示例#13
0
sl_def(synch_thread, void, sl_shparm(int, s))
{
  int temp;

  temp = sl_getp(s);
  output_int(temp, 1);
  sl_setp(s, temp + 1);
}
示例#14
0
sl_def(foo, void, sl_shparm(int, token))
{
    int token = sl_getp(token);
    (void)bar("");
    sl_index(i);
    output_int(i, 1); output_char('\n', 1);
    sl_setp(token, token);
}
示例#15
0
sl_enddef

sl_def(sha_main_outer, void,
       sl_glparm(const uint32_t*restrict, input),
       sl_shparm(unsigned long, h0),
       sl_shparm(unsigned long, h1),
       sl_shparm(unsigned long, h2),
       sl_shparm(unsigned long, h3),
       sl_shparm(unsigned long, h4))
{
  sl_index(offset_base);
  int i;
  const uint32_t*restrict input = sl_getp(input) + offset_base;

  /* word extension: not easily made concurrent! */
  uint32_t w[80];
  sl_create(,PLACE_LOCAL,,16,,,, buf_copy,
	    sl_glarg(const uint32_t*restrict, src, input),
	    sl_glarg(uint32_t*restrict, dst, w));
  sl_sync();
  //  for (i = 0; i < 16; ++i) w[i] = input[i];
  for (i = 16; i < 80; ++i) {
    uint32_t x = w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16];
    w[i] = ROL32(x, 1);
  }

  sl_create(,,,80,,,, sha_main_inner,
	    sl_glarg(const uint32_t*restrict, wg, w),
	    sl_sharg(unsigned long, a),
	    sl_sharg(unsigned long, b),
	    sl_sharg(unsigned long, c),
	    sl_sharg(unsigned long, d),
	    sl_sharg(unsigned long, e));
  sl_seta(a, sl_getp(h0));
  sl_seta(b, sl_getp(h1));
  sl_seta(c, sl_getp(h2));
  sl_seta(d, sl_getp(h3));
  sl_seta(e, sl_getp(h4));
  sl_sync();
  sl_setp(h0, sl_getp(h0) + sl_geta(a));
  sl_setp(h1, sl_getp(h1) + sl_geta(b));
  sl_setp(h2, sl_getp(h2) + sl_geta(c));
  sl_setp(h3, sl_getp(h3) + sl_geta(d));
  sl_setp(h4, sl_getp(h4) + sl_geta(e));
}
示例#16
0
sl_def(do_print, void, sl_shparm(long, tok))
{
  sl_index(i);
  const char *ptr = (const char*)(void*)(long)i;
  char c = *ptr;
  long t = sl_getp(tok);
  output_char(c, 1);
  sl_setp(tok, t);
}
示例#17
0
sl_enddef

sl_def(iprint, int,
      sl_shparm(unsigned, count),
      sl_glparm(unsigned, refcount))
{
 sl_index(i);
 unsigned c = sl_getp(count);
 if (c >= sl_getp(refcount))
 {
     sl_setp(count, c);
     sl_break ;
 }
 output_int(c, 1);
 output_char(' ', 1);
 output_int(indices[c], 1);
 output_char('\n', 1);
 sl_setp(count, c + 1);
}
示例#18
0
sl_enddef

sl_def(outerk2, void,
       sl_glparm(double*restrict, X),
       sl_glparm(const double*restrict, V),
       sl_shparm(unsigned long, ii),
       sl_shparm(unsigned long, ipntp))
{
  sl_index(m);

  unsigned long ipnt, ii;
  unsigned long ipntp = (ii = sl_getp(ii)) + (ipnt = sl_getp(ipntp));
  sl_setp(ii, ii/2);
  sl_create(,,1,ii,2,,, innerk2,
	    sl_glarg(double*restrict, , sl_getp(X)),
	    sl_glarg(const double*restrict, , sl_getp(V)),
	    sl_glarg(unsigned long, , ipnt),
	    sl_glarg(unsigned long, , ipntp));
  sl_sync();
  sl_setp(ipntp, ipntp);
}
示例#19
0
sl_enddef

sl_def(fibo_print, void,
       sl_shparm(INT, guard), sl_glparm(INT*, fibo))
{
  sl_index(i);

  INT p1 = sl_getp(fibo)[i - 2];
  INT p2 = sl_getp(fibo)[i - 1];
  INT p3 = sl_getp(fibo)[i];

  INT n = sl_getp(guard);
  printf("The %luth Fibonacci number is %lu + %lu = %lu\n", (INT)i, p1, p2, p3);
  sl_setp(guard, n);
}
示例#20
0
文件: gol.c 项目: svp-dev/sl-programs
sl_enddef


/**
GOL
**/
sl_def(gol,void,sl_shparm(int,breaked))
{

	sl_index(index);
	int flag = sl_getp(breaked);
		
	if(flag==0)
	{	
		//info_print("GOL Thread %d :Processing %d blocks\n", index, b_queue->elements);
		//printf("GOL Thread %d :Processing %d blocks\n", index, b_queue->elements);
		//info_print("%d\n",iter);
		if( index + 1 == cycle)
			flag = 1;
		debug_print("Creating Worker family...\n");
		sl_create(,,,,0,block_size,,run,sl_glarg(int,iteration,index),sl_sharg(int,sta,0));
		debug_print("Waiting for sync...\n");
		sl_sync();
        
		debug_print("Workers finished, processing request queue...\n");
		sl_create(,,,,0,block_size,,process_requests,sl_sharg(int,stat,0));
		debug_print("Waiting for sync...\n");
		sl_sync();
  		
		debug_print("Request queue processing finished, traversing...\n");
		
		b_queue->elements = 0;
	
		sl_create(,,,,0,block_size,,traverse,sl_sharg(int,state,1),sl_sharg(struct hashtable_itr*,itr,hashtable_iterator(table)));
		debug_print("Waiting for sync...\n");
		sl_sync();			
		
		
		sl_setp(breaked,flag);
		
		if(flag == 1)
			sl_break;				
	}	
	else
	{
示例#21
0
sl_def(foo, void, sl_shparm(int, a))
{
  sl_setp(a, sl_getp(a) + 1);
}
示例#22
0
sl_def(bar, void, sl_shparm(int, x))
{
    sl_setp(x, sl_getp(x)+1);
}
示例#23
0
文件: crdefaults.c 项目: knz/slcore
sl_def(a, void, sl_shparm(int, x), sl_shparm(int, v))
{
    bla = sl_getp(x);
    sl_setp(v, sl_getp(v) + bla);
}
示例#24
0
sl_def(foo, void, sl_shparm(int, a))
{
   sl_setp(a, sl_getp(a) + 1);

   putchar('.');
}
示例#25
0
sl_def(foo, void, sl_shfparm(double, a))
{
  sl_setp(a, sl_getp(a) + 1.0);
}
示例#26
0
sl_def(foo, void, sl_shparm(int, x))
{
  sl_setp(x, sl_getp(x));
}