void radixLoopTopDown(E *A, E *B, bIndexT *Tmp, intT (*BK)[BUCKETS], intT numBK, intT n, int bits, F f) { if (n == 0) return; if (bits <= MAX_RADIX) { radixStep(A, B, Tmp, BK, numBK, n, (intT)1 << bits, true, eBits<intT,E,F>(bits,0,f)); } else if (numBK >= BUCKETS+1) { radixStep(A, B, Tmp, BK, numBK, n, (intT)BUCKETS, true, eBits<intT,E,F>(MAX_RADIX,bits-MAX_RADIX,f)); intT* offsets = BK[0]; intT remain = numBK - BUCKETS - 1; float y = remain / (float) n; native::parallel_for(intT(0), intT(BUCKETS), [&] (intT i) { intT segOffset = offsets[i]; intT segNextOffset = (i == BUCKETS-1) ? n : offsets[i+1]; intT segLen = segNextOffset - segOffset; intT blocksOffset = ((intT) floor(segOffset * y)) + i + 1; intT blocksNextOffset = ((intT) floor(segNextOffset * y)) + i + 2; intT blockLen = blocksNextOffset - blocksOffset; radixLoopTopDown(A + segOffset, B + segOffset, Tmp + segOffset, BK + blocksOffset, blockLen, segLen, bits-MAX_RADIX, f); }); } else { radixLoopBottomUp(A, B, Tmp, BK, numBK, n, bits, false, f); } }
void iSort(E *A, intT* bucketOffsets, intT n, intT m, bool bottomUp, char* tmpSpace, F f) { typedef intT bucketsT[BUCKETS]; int bits = utils::log2Up(m); intT numBK = 1+n/(BUCKETS*8); // the temporary space is broken into 3 parts: B, Tmp and BK E *B = (E*) tmpSpace; intT Bsize =sizeof(E)*n; bIndexT *Tmp = (bIndexT*) (tmpSpace+Bsize); // one byte per item intT tmpSize = sizeof(bIndexT)*n; bucketsT *BK = (bucketsT*) (tmpSpace+Bsize+tmpSize); if (bits <= MAX_RADIX) { radixStep(A, B, Tmp, BK, numBK, n, (intT) 1 << bits, true, eBits<intT,E,F>(bits,0,f)); if (bucketOffsets != NULL) { native::parallel_for(intT(0), m, [&] (intT i) { bucketOffsets[i] = BK[0][i]; }); } return; } else if (bottomUp) radixLoopBottomUp(A, B, Tmp, BK, numBK, n, bits, true, f); else radixLoopTopDown(A, B, Tmp, BK, numBK, n, bits, f); if (bucketOffsets != NULL) { { native::parallel_for(intT(0), m, [&] (intT i) { bucketOffsets[i] = n; }); } { native::parallel_for(intT(0), n-1, [&] (intT i) { intT v = f(A[i]); intT vn = f(A[i+1]); if (v != vn) bucketOffsets[vn] = i+1; }); } bucketOffsets[f(A[0])] = 0; sequence::scanIBack(bucketOffsets, bucketOffsets, (intT) m, utils::minF<intT>(), (intT) n); } }
Real32 interpolatedNoise(Real32 t, UInt32 octave, UInt32 UInt32, bool Smoothing) { Real32 intT(osgFloor(t)); Real32 fractionT = t - intT; Real32 v1,v2; if(Smoothing) { v1 = getNoise(intT,octave)/2.0f + getNoise(intT - 1.0f, octave)/4.0f + getNoise(intT + 1.0f, octave)/4.0f; intT += 1.0f; v2 = getNoise(intT,octave)/2.0f + getNoise(intT - 1.0f, octave)/4.0f + getNoise(intT + 1.0f, octave)/4.0f; } else { v1 = getNoise(intT,octave); v2 = getNoise(intT + 1.0f,octave); } Real32 returnValue(0.0); if(UInt32 == PERLIN_INTERPOLATE_COSINE) returnValue = interpolateCosine(v1 , v2 , fractionT); else if(UInt32 == PERLIN_INTERPOLATE_LINEAR) returnValue = interpolateLinear(v1 , v2 , fractionT); return returnValue; }
void radixStep(E* A, E* B, bIndexT *Tmp, intT (*BK)[BUCKETS], intT numBK, intT n, intT m, bool top, F extract) { // need 3 bucket sets per block int expand = (sizeof(E)<=4) ? 64 : 32; intT blocks = min(numBK/3,(1+n/(BUCKETS*expand))); if (blocks < 2) { radixStepSerial(A, B, Tmp, BK[0], n, m, extract); return; } intT nn = (n+blocks-1)/blocks; intT* cnts = (intT*) BK; intT* oA = (intT*) (BK+blocks); intT* oB = (intT*) (BK+2*blocks); native::parallel_for(intT(0), blocks, [&] (intT i) { intT od = i*nn; intT nni = min(max<intT>(n-od,0),nn); radixBlock(A+od, B, Tmp+od, cnts + m*i, oB + m*i, od, nni, m, extract); }); transpose<intT,intT>(cnts, oA).trans(blocks, m); intT ss; if (top) ss = sequence::scan(oA, oA, blocks*m, utils::addF<intT>(),(intT)0); else ss = sequence::scanSerial(oA, oA, blocks*m, utils::addF<intT>(),(intT)0); //utils::myAssert(ss == n, "radixStep: sizes don't match"); blockTrans<E,intT>(B, A, oB, oA, cnts).trans(blocks, m); // put the offsets for each bucket in the first bucket set of BK for (intT j = 0; j < m; j++) BK[0][j] = oA[j*blocks]; }
// needs to be in separate routine due to Cilk bugs static void clearA(eType* A, intT n, eType v) { native::parallel_for(intT(0), n, [&] (intT i) { A[i] = v; }); }
void run_test (intT, thr_args_base::tag_t tag) { static const char* const tname = rw_any_t (intT ()).type_name (); if (!rw_enabled (tname)) { rw_note (0, 0, 0, "%s test disabled", tname); return; } #ifdef _RWSTD_REENTRANT static const char* const fun = "__rw_atomic_exchange"; rw_info (0, 0, 0, "__rw::%s (%s&, %2$s): %d iterations in %d threads", fun, tname, rw_opt_nloops, rw_opt_nthreads); rw_thread_t tid [MAX_THREADS]; typedef thr_args<intT> Args; Args::nthreads_ = unsigned (rw_opt_nthreads); Args::type_tag_ = tag; Args::nincr_ = unsigned (rw_opt_nloops); Args::shared_ [0] = intT (1); Args::shared_ [1] = intT (1); _RWSTD_ASSERT (Args::nthreads_ < sizeof tid / sizeof *tid); Args args [sizeof tid / sizeof *tid]; for (unsigned long i = 0; i != Args::nthreads_; ++i) { args [i].threadno_ = i; args [i].niter_ = 0; args [i].nxchg_ = 0; rw_fatal (0 == rw_thread_create (tid + i, 0, thread_routine, args + i), 0, __LINE__, "thread_create() failed"); } for (unsigned long i = 0; i != Args::nthreads_; ++i) { rw_error (0 == rw_thread_join (tid [i], 0), 0, __LINE__, "thread_join() failed"); if (args [i].niter_) { // compute the percantage of thread iterations that resulted // in increments of one of the shared variables const unsigned long incrpcnt = (100U * Args::nincr_) / args [i].niter_; printf ("thread %lu performed %lu exchanges in %lu iterations " "(%lu%% increments)\n", args [i].threadno_, args [i].nxchg_, args [i].niter_, incrpcnt); } } // compute the expected result, "skipping" zeros by incrementing // expect twice when it overflows and wraps around to 0 (zero is // used as the lock variable in thread_routine() above) intT expect = intT (1); const unsigned long nincr = (Args::nthreads_ * Args::nincr_) / 2U; for (unsigned long i = 0; i != nincr; ++i) { if (intT () == ++expect) ++expect; } // verify that the final value of the variables shared among all // threads equals the number of increments performed by the threads rw_assert (Args::shared_ [0] == expect, 0, __LINE__, "1. %s (%s&, %2$s); %s == %s failed", fun, tname, TOSTR (Args::shared_ [0]), TOSTR (expect)); rw_assert (Args::shared_ [1] == expect, 0, __LINE__, "2. %s (%s&, %2$s); %s == %s failed", fun, tname, TOSTR (Args::shared_ [1]), TOSTR (expect)); #else // if !defined (_RWSTD_REENTRANT) _RWSTD_UNUSED (tag); #endif // _RWSTD_REENTRANT }
void* thread_routine (thr_args<intT> *args) { // each thread operates on one of two shared values to exercise // problems due to operating on adjacent bytes or half-words const unsigned long inx = args->threadno_ % 2; static volatile int failed; // exercise atomic_exchange() in a tight loop // perform the requested number increments, or until the // shared `failed' variable is set to a non-zero value for (unsigned long i = 0; i != args->nincr_ && !failed; ++i) { for (unsigned long j = 0; !failed; ++j) { // increment the number of iterations of this thread ++args->niter_; // use intT() as a special "lock" value const intT old = exchange (args->shared_ [inx], intT ()); // increment the number of exchanges performed by this thread ++args->nxchg_; if (intT () != old) { // shared variable was not locked by any other thread // increment the value of the shared variable, taking // care to avoid the special "lock" value of intT() intT newval = intT (old + 1); if (intT () == newval) ++newval; const intT lock = exchange (args->shared_ [inx], newval); // increment the number of exchanges ++args->nxchg_; // the returned value must be the special "lock" value if (intT () == lock) break; // fail by setting the shared failed variable (to // prevent deadlock) if the returned value is not // the special "lock" value printf ("*** line %d: error: thread %lu failed " "at increment %lu after %lu iterations\n", __LINE__, args->threadno_, i, args->niter_); failed = 1; return 0; } if (100UL * args->nincr_ == j) { // fail by setting the shared failed variable (to // prevent deadlock) if the number of failed attempts // to lock the shared variable reaches the requested // number of increments * 100 (an arbitrary number) printf ("*** line %d: error thread %lu \"timed out\" after " "%lu increments and %lu iterations\n", __LINE__, args->threadno_, i, args->niter_); failed = 1; return 0; } } } return 0; }