sl_enddef // 2009-04-02: FIXME: we acknowledge that muTC-ptl // does not support this construct fully yet; but // we want slc's testsuite to properly succeed. So // we mark the test to ignore the output on muTC-ptl: // XIGNORE: ptl*:D sl_def(t_main, void) { int busy; sl_create(,,,,,,, foo, sl_sharg(int, a)); // FIXME: for ptl here we should find a way to // force a context switch, otherwise the problem // is not demonstrated. In the previous version // of this test, a call was done to the C library's // putc() function, but we can't do this in SL // because putc is a thread function and we cannot // nest creates. for (busy = 0; busy < 10000; ++busy) nop(); sl_seta(a, 42); sl_sync(); output_int(sl_geta(a), 1); output_char('\n', 1); }
sl_enddef #ifdef REDUCTIONS // method to perform a graph reduction of the above dependent kernel over CORES sl_def(reductionk3, void, sl_shfparm(double, Q), sl_glparm(const double*restrict, Z), sl_glparm(const double*restrict, X), sl_glparm(long, iternum)) { sl_index(redindex); long lower = sl_getp(iternum) * redindex; long upper = lower + sl_getp(iternum); sl_create(,PLACE_LOCAL, lower, upper, 1,,, innerk3, sl_shfarg(double, Qr, 0.0), sl_glarg(const double*, , sl_getp(Z)), sl_glarg(const double*, , sl_getp(X))); sl_sync(); //now accumilate the results sl_setp(Q, sl_geta(Qr) + sl_getp(Q) ); }
sl_enddef sl_def(sha_main_outer, void, sl_glparm(const uint32_t*restrict, input), sl_shparm(unsigned long, h0), sl_shparm(unsigned long, h1), sl_shparm(unsigned long, h2), sl_shparm(unsigned long, h3), sl_shparm(unsigned long, h4)) { sl_index(offset_base); int i; const uint32_t*restrict input = sl_getp(input) + offset_base; /* word extension: not easily made concurrent! */ uint32_t w[80]; sl_create(,PLACE_LOCAL,,16,,,, buf_copy, sl_glarg(const uint32_t*restrict, src, input), sl_glarg(uint32_t*restrict, dst, w)); sl_sync(); // for (i = 0; i < 16; ++i) w[i] = input[i]; for (i = 16; i < 80; ++i) { uint32_t x = w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]; w[i] = ROL32(x, 1); } sl_create(,,,80,,,, sha_main_inner, sl_glarg(const uint32_t*restrict, wg, w), sl_sharg(unsigned long, a), sl_sharg(unsigned long, b), sl_sharg(unsigned long, c), sl_sharg(unsigned long, d), sl_sharg(unsigned long, e)); sl_seta(a, sl_getp(h0)); sl_seta(b, sl_getp(h1)); sl_seta(c, sl_getp(h2)); sl_seta(d, sl_getp(h3)); sl_seta(e, sl_getp(h4)); sl_sync(); sl_setp(h0, sl_getp(h0) + sl_geta(a)); sl_setp(h1, sl_getp(h1) + sl_geta(b)); sl_setp(h2, sl_getp(h2) + sl_geta(c)); sl_setp(h3, sl_getp(h3) + sl_geta(d)); sl_setp(h4, sl_getp(h4) + sl_geta(e)); }
// done: used by a thread to signal it's right sibling when it finished sl_def(partition_interval, void, sl_glparm(INT*, data), sl_glparm(INT*, result), sl_glparm(SIZE, level), sl_shparm(int, done) ) { sl_index(i); INT* data = sl_getp(data); INT* result = sl_getp(result); SIZE level = sl_getp(level); int l = intervals[level%2][i].l; int r = intervals[level%2][i].r; if (l == r) { // this interval is sorted (1 element), so don't copy it to the // next level result[l] = data[l]; sl_setp(done, sl_getp(done)); } else { sl_create(,,1,r - l + 1,,,, do_partition_interval, sl_glarg(INT*, gdata, data + l), sl_glarg(INT*, gres, result + l), sl_sharg(SIZE, lower, 0), sl_sharg(SIZE, greater, r - l)); sl_sync(); SIZE la = sl_geta(lower); la = la + l; result[la] = data[l]; // put the pivot in the right place // copy the 2 new intervals to next level // but after the left sibling has done doing the same int left_done = sl_getp(done); workaround += left_done; // use the value, so the read doesn't get // optimized away. See comment for workaround. if (l < la) { // don't copy an interval of len=1 intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = l; intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = la; num_intervals[(level+1) % 2]++; } if (la + 1 < r) { // don't copy an interval of len=1 intervals[(level+1)%2][num_intervals[(level+1) % 2]].l = la + 1; intervals[(level+1)%2][num_intervals[(level+1) % 2]].r = r; num_intervals[(level+1) % 2]++; } //signal to the right sibling that I'm done sl_setp(done, 0); } }
int test(void) { int r; sl_create(,, -10,200,10, 0, , icount, sl_sharg(unsigned, count, 0), sl_glarg(unsigned, max, 20)); sl_sync(r); if (r != SVP_EXIT_BREAK) return 1; // should see break here if (sl_geta(count) != 20) return 1; sl_create(,,, sl_geta(count),,,, iprint, sl_sharg(unsigned, c, 0), sl_glarg(unsigned, refc, sl_geta(count))); sl_sync(); return 0; }
// // useafter4.c: this file is part of the SL toolchain. // // Copyright (C) 2009 The SL project. // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // // The complete GNU General Public Licence Notice can be found as the // `COPYING' file in the root directory. // sl_def(foo, void, sl_shparm(int, a), sl_glparm(int, b)) { } sl_enddef sl_def(t_main, void) { sl_create(,,,,,,, foo, sl_sharg(int, x), sl_glarg(int, y, 10)); sl_seta(x, 20); sl_sync(); int z = sl_geta(x) + sl_geta(y); } sl_enddef
READ_COUNTER(n) READ_ARRAY_IN(sx, n) READ_COUNTER(incx) END_READ BEGIN_PREPARE END_PREPARE BEGIN_WORK sl_create(,,,,,,,FUNCTION, sl_shfarg(FLOAT, ret, 0.), sl_glarg(long, , USE_VAR(n)), sl_glarg(const FLOAT*, , USE_VAR(sx)), sl_glarg(long, , USE_VAR(incx))); sl_sync(); USE_VAR(nrm) = sl_geta(ret); END_WORK BEGIN_OUTPUT PRINT_SCALAR(nrm) END_OUTPUT BEGIN_TEARDOWN FREE_ARRAY_IN(sx) END_TEARDOWN BEGIN_DESC BENCH_TITLE("BLAS: _NRM2") BENCH_AUTHOR("kena") BENCH_DESC("Compute SQRT(X[i]^2 + Y[i]^2)") END_DESC
// // The complete GNU General Public Licence Notice can be found as the // `COPYING' file in the root directory. // #include <svp/compiler.h> #include <svp/testoutput.h> noinline int foo(int*a) { barrier(); return a[42]; } // XIGNORE: ptl*:R sl_def(bar, void, sl_shparm(int, x)) { sl_index(i); int a[90000]; a[42] = 123; sl_setp(x, sl_getp(x) + foo(a) - i); } sl_enddef sl_def(t_main, void) { sl_create(,,23,23+1,,,, bar, sl_sharg(int, x, -58)); sl_sync(); output_int(sl_geta(x), 1); output_char('\n', 1); } sl_enddef
// The complete GNU General Public Licence Notice can be found as the // `COPYING' file in the root directory. // #include <stdio.h> sl_def(foo, void, sl_shparm(int, a)) { sl_setp(a, sl_getp(a) + 1); putchar('.'); } sl_enddef // 2009-04-02: FIXME: we acknowledge that muTC-ptl does not support // setting the shared after create fully yet; but we want slc's // testsuite to properly succeed. So we mark the test to ignore the // output on muTC-ptl: // XIGNORE: ptl*:D sl_def(t_main, void) { sl_create(,, 0, 10, 1, 0,, foo, sl_sharg(int, x)); sl_seta(x, 0); sl_sync(); printf("\n%d\n", sl_geta(x)); } sl_enddef
// modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // // The complete GNU General Public Licence Notice can be found as the // `COPYING' file in the root directory. // #include <svp/testoutput.h> sl_def(foo, void, sl_shfparm(double, sarg)) { sl_index(i); double x = sl_getp(sarg); double x2 = x; if (i == 0) sl_setp(sarg, x2); else sl_setp(sarg, x2+1); } sl_enddef sl_def(t_main, void) { sl_create(,,0,9,1,,, foo, sl_shfarg(double, s_in, 7.5)); sl_sync(); output_float(sl_geta(s_in), 1, 3); output_char('\n', 1); } sl_enddef
// // useafter3.c: this file is part of the SL toolchain. // // Copyright (C) 2009 Universiteit van Amsterdam. // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // // The complete GNU General Public Licence Notice can be found as the // `COPYING' file in the root directory. // sl_def(foo, void, sl_glparm(int, a)) { } sl_enddef sl_def(t_main, void) { sl_create(,,,,,,, foo, sl_glarg(int, x, 10)); sl_sync(); int z = sl_geta(x); } sl_enddef
{ sl_create(,,,sl_getp(len_words), 16,,, sha_main_outer, sl_glarg(const uint32_t*restrict, wg, sl_getp(w)), sl_sharg(unsigned long, h0), sl_sharg(unsigned long, h1), sl_sharg(unsigned long, h2), sl_sharg(unsigned long, h3), sl_sharg(unsigned long, h4)); sl_seta(h0, 0x67452301L); sl_seta(h1, 0xEFCDAB89L); sl_seta(h2, 0x98BADCFEL); sl_seta(h3, 0x10325476L); sl_seta(h4, 0xC3D2E1F0L); sl_sync(); uint32_t*restrict out = sl_getp(out); out[0] = sl_geta(h0); out[1] = sl_geta(h1); out[2] = sl_geta(h2); out[3] = sl_geta(h3); out[4] = sl_geta(h4); } sl_enddef #define MAX_CHUNKS 10000 uint32_t data[16*MAX_CHUNKS]; uint32_t output[5]; slr_decl(slr_var(unsigned, N, "problem size (512-bit chunks)")); // SLT_RUN: N=10