예제 #1
0
sl_enddef

// 2009-04-02: FIXME: we acknowledge that muTC-ptl
// does not support this construct fully yet; but
// we want slc's testsuite to properly succeed. So
// we mark the test to ignore the output on muTC-ptl:
// XIGNORE: ptl*:D

sl_def(t_main, void)
{
  int busy;
  sl_create(,,,,,,,
	    foo, sl_sharg(int, a));
  // FIXME: for ptl here we should find a way to
  // force a context switch, otherwise the problem
  // is not demonstrated. In the previous version
  // of this test, a call was done to the C library's
  // putc() function, but we can't do this in SL
  // because putc is a thread function and we cannot
  // nest creates.
  for (busy = 0; busy < 10000; ++busy) nop();
  sl_seta(a, 42);
  sl_sync();

  output_int(sl_geta(a), 1);
  output_char('\n', 1);
}
예제 #2
0
sl_enddef

#ifdef REDUCTIONS

// method to perform a graph reduction of the above dependent kernel over CORES
sl_def(reductionk3, void,
       sl_shfparm(double, Q),
       sl_glparm(const double*restrict, Z),
       sl_glparm(const double*restrict, X),
       sl_glparm(long, iternum))
{
    sl_index(redindex);

    long lower = sl_getp(iternum) * redindex;
    long upper = lower + sl_getp(iternum);

    sl_create(,PLACE_LOCAL, lower, upper, 1,,, innerk3,
              sl_shfarg(double, Qr, 0.0),
              sl_glarg(const double*, , sl_getp(Z)),
              sl_glarg(const double*, , sl_getp(X)));
    sl_sync();

    //now accumilate the results
    sl_setp(Q, sl_geta(Qr) + sl_getp(Q) );
}
예제 #3
0
sl_enddef

sl_def(sha_main_outer, void,
       sl_glparm(const uint32_t*restrict, input),
       sl_shparm(unsigned long, h0),
       sl_shparm(unsigned long, h1),
       sl_shparm(unsigned long, h2),
       sl_shparm(unsigned long, h3),
       sl_shparm(unsigned long, h4))
{
  sl_index(offset_base);
  int i;
  const uint32_t*restrict input = sl_getp(input) + offset_base;

  /* word extension: not easily made concurrent! */
  uint32_t w[80];
  sl_create(,PLACE_LOCAL,,16,,,, buf_copy,
	    sl_glarg(const uint32_t*restrict, src, input),
	    sl_glarg(uint32_t*restrict, dst, w));
  sl_sync();
  //  for (i = 0; i < 16; ++i) w[i] = input[i];
  for (i = 16; i < 80; ++i) {
    uint32_t x = w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16];
    w[i] = ROL32(x, 1);
  }

  sl_create(,,,80,,,, sha_main_inner,
	    sl_glarg(const uint32_t*restrict, wg, w),
	    sl_sharg(unsigned long, a),
	    sl_sharg(unsigned long, b),
	    sl_sharg(unsigned long, c),
	    sl_sharg(unsigned long, d),
	    sl_sharg(unsigned long, e));
  sl_seta(a, sl_getp(h0));
  sl_seta(b, sl_getp(h1));
  sl_seta(c, sl_getp(h2));
  sl_seta(d, sl_getp(h3));
  sl_seta(e, sl_getp(h4));
  sl_sync();
  sl_setp(h0, sl_getp(h0) + sl_geta(a));
  sl_setp(h1, sl_getp(h1) + sl_geta(b));
  sl_setp(h2, sl_getp(h2) + sl_geta(c));
  sl_setp(h3, sl_getp(h3) + sl_geta(d));
  sl_setp(h4, sl_getp(h4) + sl_geta(e));
}
// done: used by a thread to signal it's right sibling when it finished
sl_def(partition_interval, void,
       sl_glparm(INT*, data),
       sl_glparm(INT*, result),
       sl_glparm(SIZE, level),
       sl_shparm(int, done)
       ) {
  sl_index(i);

  INT* data = sl_getp(data);
  INT* result = sl_getp(result);
  SIZE level = sl_getp(level);

  int l = intervals[level%2][i].l;
  int r = intervals[level%2][i].r;

  if (l == r) {
    // this interval is sorted (1 element), so don't copy it to the
    // next level
    result[l] = data[l];
    sl_setp(done, sl_getp(done));
  } else {
    sl_create(,,1,r - l + 1,,,,
	      do_partition_interval,
	      sl_glarg(INT*, gdata, data + l),
	      sl_glarg(INT*, gres, result + l),
	      sl_sharg(SIZE, lower, 0),
	      sl_sharg(SIZE, greater, r - l));
    sl_sync();
    SIZE la = sl_geta(lower);
    la = la + l;
    result[la] = data[l];  // put the pivot in the right place
    // copy the 2 new intervals to next level
    // but after the left sibling has done doing the same
    int left_done = sl_getp(done);
    workaround += left_done;  // use the value, so the read doesn't get
                       // optimized away. See comment for workaround.

    if (l < la) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = l;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = la;
      num_intervals[(level+1) % 2]++;
    }
    if (la + 1 < r) {  // don't copy an interval of len=1
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = la + 1;
      intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = r;
      num_intervals[(level+1) % 2]++;
    }

    //signal to the right sibling that I'm done
    sl_setp(done, 0);
  }
}
예제 #5
0
int test(void)
{
 int r;

 sl_create(,, -10,200,10, 0, ,
           icount,
           sl_sharg(unsigned, count, 0),
           sl_glarg(unsigned, max, 20));
 sl_sync(r);
 if (r != SVP_EXIT_BREAK) 
     return 1; // should see break here

 if (sl_geta(count) != 20)
     return 1;

 sl_create(,,, sl_geta(count),,,,
           iprint,
           sl_sharg(unsigned, c, 0),
           sl_glarg(unsigned, refc, sl_geta(count)));
 sl_sync();

 return 0;
}
예제 #6
0
파일: useafter4.c 프로젝트: fuzzie/slcore
//
// useafter4.c: this file is part of the SL toolchain.
//
// Copyright (C) 2009 The SL project.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//

sl_def(foo, void, sl_shparm(int, a), sl_glparm(int, b))
{ }
sl_enddef

sl_def(t_main, void)
{
  sl_create(,,,,,,, foo, sl_sharg(int, x), sl_glarg(int, y, 10));
  sl_seta(x, 20);
  sl_sync();
  int z = sl_geta(x) + sl_geta(y);
}
sl_enddef
예제 #7
0
READ_COUNTER(n)
READ_ARRAY_IN(sx, n)
READ_COUNTER(incx)
END_READ

BEGIN_PREPARE
END_PREPARE

BEGIN_WORK
     sl_create(,,,,,,,FUNCTION,
	       sl_shfarg(FLOAT, ret, 0.),
		 sl_glarg(long, , USE_VAR(n)),
		 sl_glarg(const FLOAT*, , USE_VAR(sx)),
	       sl_glarg(long, , USE_VAR(incx)));
sl_sync();
USE_VAR(nrm) = sl_geta(ret);
END_WORK

BEGIN_OUTPUT
PRINT_SCALAR(nrm)
END_OUTPUT

BEGIN_TEARDOWN
FREE_ARRAY_IN(sx)
END_TEARDOWN

BEGIN_DESC
BENCH_TITLE("BLAS: _NRM2")
BENCH_AUTHOR("kena")
BENCH_DESC("Compute SQRT(X[i]^2 + Y[i]^2)")
END_DESC
예제 #8
0
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//

#include <svp/compiler.h>
#include <svp/testoutput.h>

noinline
int foo(int*a) { barrier(); return a[42]; }

// XIGNORE: ptl*:R

sl_def(bar, void, sl_shparm(int, x))
{
    sl_index(i);
    int a[90000];
    a[42] = 123;
    sl_setp(x, sl_getp(x) + foo(a) - i);
}
sl_enddef

sl_def(t_main, void)
{
    sl_create(,,23,23+1,,,, bar, sl_sharg(int, x, -58));
    sl_sync();
    output_int(sl_geta(x), 1);
    output_char('\n', 1);
}
sl_enddef
예제 #9
0
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//

#include <stdio.h>

sl_def(foo, void, sl_shparm(int, a))
{
   sl_setp(a, sl_getp(a) + 1);

   putchar('.');
}
sl_enddef

// 2009-04-02: FIXME: we acknowledge that muTC-ptl does not support
// setting the shared after create fully yet; but we want slc's
// testsuite to properly succeed. So we mark the test to ignore the
// output on muTC-ptl:
// XIGNORE: ptl*:D

sl_def(t_main, void)
{
  sl_create(,, 0, 10, 1, 0,,
            foo, sl_sharg(int, x));
  sl_seta(x, 0);
  sl_sync();

  printf("\n%d\n", sl_geta(x));
}
sl_enddef
예제 #10
0
파일: shfloat.c 프로젝트: jianfu/slcore
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//

#include <svp/testoutput.h>

sl_def(foo, void, sl_shfparm(double, sarg))
{
  sl_index(i);
  double x = sl_getp(sarg);
  double x2 = x;
  if (i == 0) 
      sl_setp(sarg, x2);
  else 
      sl_setp(sarg, x2+1);
}
sl_enddef

sl_def(t_main, void)
{
  sl_create(,,0,9,1,,, foo, sl_shfarg(double, s_in, 7.5));
  sl_sync();
  output_float(sl_geta(s_in), 1, 3);
  output_char('\n', 1);
}
sl_enddef
예제 #11
0
//
// useafter3.c: this file is part of the SL toolchain.
//
// Copyright (C) 2009 Universiteit van Amsterdam.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// The complete GNU General Public Licence Notice can be found as the
// `COPYING' file in the root directory.
//

sl_def(foo, void, sl_glparm(int, a))
{ }
sl_enddef

sl_def(t_main, void)
{
  sl_create(,,,,,,, foo, sl_glarg(int, x, 10));
  sl_sync();
  int z = sl_geta(x);
}
sl_enddef
예제 #12
0
{
  sl_create(,,,sl_getp(len_words), 16,,, sha_main_outer,
	    sl_glarg(const uint32_t*restrict, wg, sl_getp(w)),
	    sl_sharg(unsigned long, h0),
	    sl_sharg(unsigned long, h1),
	    sl_sharg(unsigned long, h2),
	    sl_sharg(unsigned long, h3),
	    sl_sharg(unsigned long, h4));
  sl_seta(h0, 0x67452301L);
  sl_seta(h1, 0xEFCDAB89L);
  sl_seta(h2, 0x98BADCFEL);
  sl_seta(h3, 0x10325476L);
  sl_seta(h4, 0xC3D2E1F0L);
  sl_sync();
  uint32_t*restrict out = sl_getp(out);
  out[0] = sl_geta(h0);
  out[1] = sl_geta(h1);
  out[2] = sl_geta(h2);
  out[3] = sl_geta(h3);
  out[4] = sl_geta(h4);
}
sl_enddef

#define MAX_CHUNKS 10000

uint32_t data[16*MAX_CHUNKS];
uint32_t output[5];

slr_decl(slr_var(unsigned, N, "problem size (512-bit chunks)"));

// SLT_RUN: N=10