Esempio n. 1
0
 void radixLoopTopDown(E *A, E *B, bIndexT *Tmp, intT (*BK)[BUCKETS],
                       intT numBK, intT n, int bits, F f) {
   if (n == 0) return;
   if (bits <= MAX_RADIX) {
     radixStep(A, B, Tmp, BK, numBK, n, (intT)1 << bits, true, eBits<intT,E,F>(bits,0,f));
   } else if (numBK >= BUCKETS+1) {
     radixStep(A, B, Tmp, BK, numBK, n, (intT)BUCKETS, true,
               eBits<intT,E,F>(MAX_RADIX,bits-MAX_RADIX,f));
     intT* offsets = BK[0];
     intT remain = numBK - BUCKETS - 1;
     float y = remain / (float) n;
     native::parallel_for(intT(0), intT(BUCKETS), [&] (intT i) {
       intT segOffset = offsets[i];
       intT segNextOffset = (i == BUCKETS-1) ? n : offsets[i+1];
       intT segLen = segNextOffset - segOffset;
       intT blocksOffset = ((intT) floor(segOffset * y)) + i + 1;
       intT blocksNextOffset = ((intT) floor(segNextOffset * y)) + i + 2;
       intT blockLen = blocksNextOffset - blocksOffset;
       radixLoopTopDown(A + segOffset, B + segOffset, Tmp + segOffset,
                        BK + blocksOffset, blockLen, segLen,
                        bits-MAX_RADIX, f);
     });
   } else {
     radixLoopBottomUp(A, B, Tmp, BK, numBK, n, bits, false, f);
   }
 }
Esempio n. 2
0
 void iSort(E *A, intT* bucketOffsets, intT n, intT m, bool bottomUp,
            char* tmpSpace, F f) {
   
   typedef intT bucketsT[BUCKETS];
   
   
   int bits = utils::log2Up(m);
   intT numBK = 1+n/(BUCKETS*8);
   
   // the temporary space is broken into 3 parts: B, Tmp and BK
   E *B = (E*) tmpSpace;
   intT Bsize =sizeof(E)*n;
   bIndexT *Tmp = (bIndexT*) (tmpSpace+Bsize); // one byte per item
   intT tmpSize = sizeof(bIndexT)*n;
   bucketsT *BK = (bucketsT*) (tmpSpace+Bsize+tmpSize);
   if (bits <= MAX_RADIX) {
     radixStep(A, B, Tmp, BK, numBK, n, (intT) 1 << bits, true, eBits<intT,E,F>(bits,0,f));
     if (bucketOffsets != NULL) {
       native::parallel_for(intT(0), m, [&] (intT i) {
         bucketOffsets[i] = BK[0][i];
       });
     }
     return;
   } else if (bottomUp)
     radixLoopBottomUp(A, B, Tmp, BK, numBK, n, bits, true, f);
   else
     radixLoopTopDown(A, B, Tmp, BK, numBK, n, bits, f);
   if (bucketOffsets != NULL) {
     { native::parallel_for(intT(0), m, [&] (intT i) { bucketOffsets[i] = n; }); }
     { native::parallel_for(intT(0), n-1, [&] (intT i) {
       intT v = f(A[i]);
       intT vn = f(A[i+1]);
       if (v != vn) bucketOffsets[vn] = i+1;
     }); }
     bucketOffsets[f(A[0])] = 0;
     sequence::scanIBack(bucketOffsets, bucketOffsets, (intT) m,
                         utils::minF<intT>(), (intT) n);
   }
 }
Esempio n. 3
0
Real32 interpolatedNoise(Real32 t, UInt32 octave, UInt32 UInt32, bool Smoothing)
{
	Real32 intT(osgFloor(t));
	Real32 fractionT = t - intT;
	Real32 v1,v2;
	if(Smoothing)
	{
		v1 = getNoise(intT,octave)/2.0f + getNoise(intT - 1.0f, octave)/4.0f + getNoise(intT + 1.0f, octave)/4.0f;
		intT += 1.0f;
		v2 = getNoise(intT,octave)/2.0f + getNoise(intT - 1.0f, octave)/4.0f + getNoise(intT + 1.0f, octave)/4.0f;
	} else
	{
		v1 = getNoise(intT,octave);
		v2 = getNoise(intT + 1.0f,octave);
	}

	Real32 returnValue(0.0);
	if(UInt32 == PERLIN_INTERPOLATE_COSINE) returnValue = interpolateCosine(v1 , v2 , fractionT);
	else if(UInt32 == PERLIN_INTERPOLATE_LINEAR) returnValue = interpolateLinear(v1 , v2 , fractionT);
	
	return returnValue;
}
Esempio n. 4
0
 void radixStep(E* A, E* B, bIndexT *Tmp, intT (*BK)[BUCKETS],
                intT numBK, intT n, intT m, bool top, F extract) {
   
   
   // need 3 bucket sets per block
   int expand = (sizeof(E)<=4) ? 64 : 32;
   intT blocks = min(numBK/3,(1+n/(BUCKETS*expand)));
   
   if (blocks < 2) {
     radixStepSerial(A, B, Tmp, BK[0], n, m, extract);
     return;
   }
   intT nn = (n+blocks-1)/blocks;
   intT* cnts = (intT*) BK;
   intT* oA = (intT*) (BK+blocks);
   intT* oB = (intT*) (BK+2*blocks);
   
   native::parallel_for(intT(0), blocks, [&] (intT i) {
     intT od = i*nn;
     intT nni = min(max<intT>(n-od,0),nn);
     radixBlock(A+od, B, Tmp+od, cnts + m*i, oB + m*i, od, nni, m, extract);
   });
   
   transpose<intT,intT>(cnts, oA).trans(blocks, m);
   
   intT ss;
   if (top)
     ss = sequence::scan(oA, oA, blocks*m, utils::addF<intT>(),(intT)0);
   else
     ss = sequence::scanSerial(oA, oA, blocks*m, utils::addF<intT>(),(intT)0);
   //utils::myAssert(ss == n, "radixStep: sizes don't match");
   
   blockTrans<E,intT>(B, A, oB, oA, cnts).trans(blocks, m);
   
   // put the offsets for each bucket in the first bucket set of BK
   for (intT j = 0; j < m; j++) BK[0][j] = oA[j*blocks];
 }
Esempio n. 5
0
 // needs to be in separate routine due to Cilk bugs
 static void clearA(eType* A, intT n, eType v) {
   native::parallel_for(intT(0), n, [&] (intT i) { A[i] = v; });
 }
Esempio n. 6
0
void run_test (intT, thr_args_base::tag_t tag)
{
    static const char* const tname = rw_any_t (intT ()).type_name ();

    if (!rw_enabled (tname)) {
        rw_note (0, 0, 0, "%s test disabled", tname);
        return;
    }

#ifdef _RWSTD_REENTRANT

    static const char* const fun = "__rw_atomic_exchange";

    rw_info (0, 0, 0, "__rw::%s (%s&, %2$s): %d iterations in %d threads",
             fun, tname, rw_opt_nloops, rw_opt_nthreads);

    rw_thread_t tid [MAX_THREADS];

    typedef thr_args<intT> Args;

    Args::nthreads_   = unsigned (rw_opt_nthreads);
    Args::type_tag_   = tag;
    Args::nincr_      = unsigned (rw_opt_nloops);
    Args::shared_ [0] = intT (1);
    Args::shared_ [1] = intT (1);

    _RWSTD_ASSERT (Args::nthreads_ < sizeof tid / sizeof *tid);

    Args args [sizeof tid / sizeof *tid];

    for (unsigned long i = 0; i != Args::nthreads_; ++i) {

        args [i].threadno_ = i;
        args [i].niter_    = 0;
        args [i].nxchg_    = 0;

        rw_fatal (0 == rw_thread_create (tid + i, 0, thread_routine, args + i),
                  0, __LINE__, "thread_create() failed");
    }
            
    for (unsigned long i = 0; i != Args::nthreads_; ++i) {

        rw_error (0 == rw_thread_join (tid [i], 0), 0, __LINE__,
                  "thread_join() failed");

        if (args [i].niter_) {
            // compute the percantage of thread iterations that resulted
            // in increments of one of the shared variables
            const unsigned long incrpcnt =
                (100U * Args::nincr_) / args [i].niter_;

            printf ("thread %lu performed %lu exchanges in %lu iterations "
                    "(%lu%% increments)\n",
                    args [i].threadno_, args [i].nxchg_,
                    args [i].niter_, incrpcnt);
        }
    }

    // compute the expected result, "skipping" zeros by incrementing
    // expect twice when it overflows and wraps around to 0 (zero is
    // used as the lock variable in thread_routine() above)
    intT expect = intT (1);

    const unsigned long nincr = (Args::nthreads_ * Args::nincr_) / 2U;
        
    for (unsigned long i = 0; i != nincr; ++i) {
        if (intT () == ++expect)
            ++expect;
    }

    // verify that the final value of the variables shared among all
    // threads equals the number of increments performed by the threads
    rw_assert (Args::shared_ [0] == expect, 0, __LINE__,
               "1. %s (%s&, %2$s); %s == %s failed",
               fun, tname, TOSTR (Args::shared_ [0]), TOSTR (expect));

    rw_assert (Args::shared_ [1] == expect, 0, __LINE__,
               "2. %s (%s&, %2$s); %s == %s failed",
               fun, tname, TOSTR (Args::shared_ [1]), TOSTR (expect));

#else   // if !defined (_RWSTD_REENTRANT)

    _RWSTD_UNUSED (tag);

#endif   // _RWSTD_REENTRANT
}
Esempio n. 7
0
void* thread_routine (thr_args<intT> *args)
{
    // each thread operates on one of two shared values to exercise
    // problems due to operating on adjacent bytes or half-words
    const unsigned long inx = args->threadno_ % 2;

    static volatile int failed;

    // exercise atomic_exchange() in a tight loop

    // perform the requested number increments, or until the
    // shared `failed' variable is set to a non-zero value

    for (unsigned long i = 0; i != args->nincr_ && !failed; ++i) {

        for (unsigned long j = 0; !failed; ++j) {

            // increment the number of iterations of this thread
            ++args->niter_;

            // use intT() as a special "lock" value
            const intT old = exchange (args->shared_ [inx], intT ());

            // increment the number of exchanges performed by this thread
            ++args->nxchg_;

            if (intT () != old) {

                // shared variable was not locked by any other thread

                // increment the value of the shared variable, taking
                // care to avoid the special "lock" value of intT()
                intT newval = intT (old + 1);

                if (intT () == newval)
                    ++newval;

                const intT lock = exchange (args->shared_ [inx], newval);

                // increment the number of exchanges
                ++args->nxchg_;

                // the returned value must be the special "lock" value
                if (intT () == lock)
                    break;

                // fail by setting the shared failed variable (to
                // prevent deadlock) if the returned value is not
                // the special "lock" value

                printf ("*** line %d: error: thread %lu failed "
                        "at increment %lu after %lu iterations\n",
                        __LINE__, args->threadno_, i, args->niter_);
                failed = 1;
                return 0;
            }

            if (100UL * args->nincr_ == j) {

                // fail by setting the shared failed variable (to
                // prevent deadlock) if the number of failed attempts
                // to lock the shared variable reaches the requested
                // number of increments * 100 (an arbitrary number)

                printf ("*** line %d: error thread %lu \"timed out\" after "
                        "%lu increments and %lu iterations\n",
                        __LINE__, args->threadno_, i, args->niter_);
                failed = 1;
                return 0;
            }
        }
    }

    return 0;
}