void do_work(const Iter first, const Iter last, size_t thread_index) { using namespace std; auto mean = calc_mean(first, last); local_results[thread_index] = mean; b.wait(); if (thread_index == num_threads - 1) { cout << "local means: "; print_all(local_results.begin(), local_results.end()); global_mean = std::accumulate(local_results.begin(), local_results.end(), 0) / num_threads; cout << "global mean: " << global_mean << endl; // TODO: local mean of this (master) thread } b.wait(); const auto variance = calc_variance(first, last, global_mean); local_results[thread_index] = variance; b.wait(); if(thread_index == num_threads - 1) { cout << "local variance: "; print_all(local_results.begin(), local_results.end()); cout << endl; global_deviation = sqrt(std::accumulate(local_results.begin(), local_results.end(), 0)/num_threads); } }
Error operator()(AsyncLoaderTaskContext& ctx) { if(m_count) { auto x = m_count->fetchAdd(1); if(m_id >= 0) { if(m_id != static_cast<I32>(x)) { ANKI_LOGE("Wrong excecution order"); return ErrorCode::FUNCTION_FAILED; } } } if(m_sleepTime != 0.0) { HighRezTimer::sleep(m_sleepTime); } if(m_barrier) { m_barrier->wait(); } ctx.m_pause = m_pause; ctx.m_resubmitTask = m_resubmit; m_resubmit = false; return ErrorCode::NONE; }
/** Code to execute in the thread. * Executes loop() in each cycle. This is the default implementation and if * you need a more specific behaviour you can override this run() method and * ignore loop(). * Although this method is declared virtual, it should not be overridden, other * than with the following trivial snippet: * @code * protected: virtual void run() { Thread::run(); } * @endcode * The reason not to do other changes is that it contains complex house keeping * code that the system relies on. The reason for still allowing the override is * solely to make reading back traces in your debugger easier. Because now there * the class name of the thread sub-class will appear in the back trace, while * it would not otherwise. */ void Thread::run() { if ( __op_mode == OPMODE_WAITFORWAKEUP ) { // Wait for initial wakeup // __sleep_mutex has been locked in entry() already! while (__pending_wakeups == 0) { __waiting_for_wakeup = true; __sleep_condition->wait(); } __pending_wakeups -= 1; __sleep_mutex->unlock(); } forever { loopinterrupt_antistarve_mutex->stopby(); loop_mutex->lock(); if ( ! finalize_prepared ) { __loop_done = false; loop(); } loop_mutex->unlock(); __loop_done_mutex->lock(); __loop_done = true; __loop_done_mutex->unlock(); __loop_done_waitcond->wake_all(); test_cancel(); if ( __op_mode == OPMODE_WAITFORWAKEUP ) { if ( __barrier ) { __sleep_mutex->lock(); Barrier *b = __barrier; __barrier = NULL; __sleep_mutex->unlock(); b->wait(); __sleep_mutex->lock(); } else { __sleep_mutex->lock(); } while (__pending_wakeups == 0) { __waiting_for_wakeup = true; __sleep_condition->wait(); } __pending_wakeups -= 1; __sleep_mutex->unlock(); } yield(); } }
/* * this is called by our "fake" BpGraphicBufferProducer. We package the * data and reply Parcel and forward them to the calling thread. */ virtual status_t transact(uint32_t code, const Parcel& data, Parcel* reply, uint32_t flags) { this->code = code; this->data = &data; this->reply = reply; android_atomic_acquire_store(0, &memoryBarrier); if (exitPending) { // if we've exited, we run the message synchronously right here handleMessage(Message(MSG_API_CALL)); } else { barrier.close(); looper->sendMessage(this, Message(MSG_API_CALL)); barrier.wait(); } return NO_ERROR; }
Error operator()(AsyncLoaderTaskContext& ctx) { void* mem = m_alloc.allocate(10); if(!mem) return ErrorCode::FUNCTION_FAILED; HighRezTimer::sleep(0.1); m_alloc.deallocate(mem, 10); if(m_barrier) { m_barrier->wait(); } return ErrorCode::NONE; }
void retireTask(Task* task) { std::unique_lock<std::mutex> counterLock(m_threadCounterMutex); m_threadsDoneCount++; if (m_threadsDoneCount >= m_totalThreads) { task->notifyComplete(); m_threadsDoneCount = 0; std::unique_lock<std::mutex> queueLock(m_queueMutex); m_tasks.pop(); if (m_tasks.empty()) { m_tasksComplete.notify_all(); } } counterLock.unlock(); m_barrier.wait(); }
// // All hardware threads start execution here // int main() { Fiber::initSelf(); #if 0 Core::current()->addFiber(new TestFiber('0' + Core::currentStrandId())); while (true) Core::reschedule(); #endif render::Rasterizer rasterizer(kFbWidth, kFbHeight); render::RenderTarget renderTarget; renderTarget.setColorBuffer(&gColorBuffer); renderTarget.setZBuffer(&gZBuffer); #if DRAW_TORUS #if GOURAND_SHADER GourandVertexShader vertexShader; GourandPixelShader pixelShader(&renderTarget); #else PhongVertexShader vertexShader; PhongPixelShader pixelShader(&renderTarget); #endif const float *vertices = kTorusVertices; int numVertices = kNumTorusVertices; const int *indices = kTorusIndices; int numIndices = kNumTorusIndices; #elif DRAW_CUBE TextureVertexShader vertexShader; TexturePixelShader pixelShader(&renderTarget); pixelShader.bindTexture(&texture); const float *vertices = kCubeVertices; int numVertices = kNumCubeVertices; const int *indices = kCubeIndices; int numIndices = kNumCubeIndices; #elif DRAW_TEAPOT #if GOURAND_SHADER GourandVertexShader vertexShader; GourandPixelShader pixelShader(&renderTarget); #else PhongVertexShader vertexShader; PhongPixelShader pixelShader(&renderTarget); #endif const float *vertices = kTeapotVertices; int numVertices = kNumTeapotVertices; const int *indices = kTeapotIndices; int numIndices = kNumTeapotIndices; #endif const float kAspectRatio = float(kFbWidth) / float(kFbHeight); const float kProjCoeff[4][4] = { { 1.0f / kAspectRatio, 0.0, 0.0, 0.0 }, { 0.0, 1.0, 0.0, 0.0 }, { 0.0, 0.0, 1.0, 0.0 }, { 0.0, 0.0, 1.0, 0.0 }, }; vertexShader.setProjectionMatrix(Matrix(kProjCoeff)); #if DRAW_TORUS vertexShader.applyTransform(translate(0.0f, 0.0f, 1.5f)); vertexShader.applyTransform(rotateAboutAxis(M_PI / 3.5, 0.707f, 0.707f, 0.0f)); #elif DRAW_CUBE vertexShader.applyTransform(translate(0.0f, 0.0f, 2.0f)); vertexShader.applyTransform(rotateAboutAxis(M_PI / 3.5, 0.707f, 0.707f, 0.0f)); #elif DRAW_TEAPOT vertexShader.applyTransform(translate(0.0f, 0.1f, 0.25f)); vertexShader.applyTransform(rotateAboutAxis(M_PI, -1.0f, 0.0f, 0.0f)); #endif Matrix rotateStepMatrix(rotateAboutAxis(M_PI / 8, 0.707f, 0.707f, 0.0f)); pixelShader.enableZBuffer(true); // pixelShader.enableBlend(true); if (Core::currentStrandId() == 0) gVertexParams = (float*) allocMem(16384 * sizeof(float)); gInitBarrier.wait(); int numVertexParams = vertexShader.getNumParams(); for (int frame = 0; frame < 1; frame++) { // // Geometry phase. Statically assign groups of 16 vertices to threads. Although these may be // handled in arbitrary order, they are put into gVertexParams in proper order (this is a sort // middle architecture, and gVertexParams is in the middle). // int vertexIndex = Core::currentStrandId() * 16; while (vertexIndex < numVertices) { vertexShader.processVertices(gVertexParams + vertexShader.getNumParams() * vertexIndex, vertices + vertexShader.getNumAttribs() * vertexIndex, numVertices - vertexIndex); vertexIndex += 16 * kNumCores * kHardwareThreadsPerCore; } if (Core::currentStrandId() == 0) gNextTileIndex = 0; vertexShader.applyTransform(rotateStepMatrix); gGeometryBarrier.wait(); // // Pixel phase // #if WIREFRAME if (Core::currentStrandId() == 0) { // Only thread 0 does wireframes for (int tileY = 0; tileY < kFbHeight; tileY += kTileSize) { for (int tileX = 0; tileX < kFbWidth; tileX += kTileSize) renderTarget.getColorBuffer()->clearTile(tileX, tileY, 0); } for (int vidx = 0; vidx < numIndices; vidx += 3) { int offset0 = indices[vidx] * numVertexParams; int offset1 = indices[vidx + 1] * numVertexParams; int offset2 = indices[vidx + 2] * numVertexParams; float x0 = gVertexParams[offset0 + kParamX]; float y0 = gVertexParams[offset0 + kParamY]; float x1 = gVertexParams[offset1 + kParamX]; float y1 = gVertexParams[offset1 + kParamY]; float x2 = gVertexParams[offset2 + kParamX]; float y2 = gVertexParams[offset2 + kParamY]; // Convert screen space coordinates to raster coordinates int x0Rast = x0 * kFbWidth / 2 + kFbWidth / 2; int y0Rast = y0 * kFbHeight / 2 + kFbHeight / 2; int x1Rast = x1 * kFbWidth / 2 + kFbWidth / 2; int y1Rast = y1 * kFbHeight / 2 + kFbHeight / 2; int x2Rast = x2 * kFbWidth / 2 + kFbWidth / 2; int y2Rast = y2 * kFbHeight / 2 + kFbHeight / 2; drawLine(&gColorBuffer, x0Rast, y0Rast, x1Rast, y1Rast, 0xffffffff); drawLine(&gColorBuffer, x1Rast, y1Rast, x2Rast, y2Rast, 0xffffffff); drawLine(&gColorBuffer, x2Rast, y2Rast, x0Rast, y0Rast, 0xffffffff); } for (int tileY = 0; tileY < kFbHeight; tileY += kTileSize) { for (int tileX = 0; tileX < kFbWidth; tileX += kTileSize) renderTarget.getColorBuffer()->flushTile(tileX, tileY); } } #else // #if WIREFRAME while (gNextTileIndex < kMaxTileIndex) { // Grab the next available tile to begin working on. int myTileIndex = __sync_fetch_and_add(&gNextTileIndex, 1); if (myTileIndex >= kMaxTileIndex) break; int tileX = (myTileIndex % kTilesPerRow) * kTileSize; int tileY = (myTileIndex / kTilesPerRow) * kTileSize; renderTarget.getColorBuffer()->clearTile(tileX, tileY, 0); if (pixelShader.isZBufferEnabled()) { // XXX Ideally, we'd initialize to infinity, but comparisons // with infinity are broken in hardware. For now, initialize // to a very large number renderTarget.getZBuffer()->clearTile(tileX, tileY, 0x7e000000); } // Cycle through all triangles and attempt to render into this // NxN tile. for (int vidx = 0; vidx < numIndices; vidx += 3) { int offset0 = indices[vidx] * numVertexParams; int offset1 = indices[vidx + 1] * numVertexParams; int offset2 = indices[vidx + 2] * numVertexParams; float x0 = gVertexParams[offset0 + kParamX]; float y0 = gVertexParams[offset0 + kParamY]; float z0 = gVertexParams[offset0 + kParamZ]; float x1 = gVertexParams[offset1 + kParamX]; float y1 = gVertexParams[offset1 + kParamY]; float z1 = gVertexParams[offset1 + kParamZ]; float x2 = gVertexParams[offset2 + kParamX]; float y2 = gVertexParams[offset2 + kParamY]; float z2 = gVertexParams[offset2 + kParamZ]; // Convert screen space coordinates to raster coordinates int x0Rast = x0 * kFbWidth / 2 + kFbWidth / 2; int y0Rast = y0 * kFbHeight / 2 + kFbHeight / 2; int x1Rast = x1 * kFbWidth / 2 + kFbWidth / 2; int y1Rast = y1 * kFbHeight / 2 + kFbHeight / 2; int x2Rast = x2 * kFbWidth / 2 + kFbWidth / 2; int y2Rast = y2 * kFbHeight / 2 + kFbHeight / 2; #if ENABLE_BOUNDING_BOX_CHECK // Bounding box check. If triangles are not within this tile, // skip them. int xMax = tileX + kTileSize; int yMax = tileY + kTileSize; if ((x0Rast < tileX && x1Rast < tileX && x2Rast < tileX) || (y0Rast < tileY && y1Rast < tileY && y2Rast < tileY) || (x0Rast > xMax && x1Rast > xMax && x2Rast > xMax) || (y0Rast > yMax && y1Rast > yMax && y2Rast > yMax)) continue; #endif #if ENABLE_BACKFACE_CULL // Backface cull triangles that are facing away from camera. // We also remove triangles that are edge on here, since they // won't be rasterized correctly. if ((x1Rast - x0Rast) * (y2Rast - y0Rast) - (y1Rast - y0Rast) * (x2Rast - x0Rast) <= 0) continue; #endif // Set up parameters and rasterize triangle. pixelShader.setUpTriangle(x0, y0, z0, x1, y1, z1, x2, y2, z2); for (int paramI = 0; paramI < numVertexParams; paramI++) { pixelShader.setUpParam(paramI, gVertexParams[offset0 + paramI + 4], gVertexParams[offset1 + paramI + 4], gVertexParams[offset2 + paramI + 4]); } rasterizer.fillTriangle(&pixelShader, tileX, tileY, x0Rast, y0Rast, x1Rast, y1Rast, x2Rast, y2Rast); } renderTarget.getColorBuffer()->flushTile(tileX, tileY); } #endif // #if WIREFRAME gPixelBarrier.wait(); } return 0; }
/* Callback routine for synchronization of C++11 threads. */ void cpp11thread_sync(int tid, int tsize, void *data) { Barrier *barrier = reinterpret_cast<Barrier*>(data); barrier->wait(); }
/* svc() will execute in each thread & do a few things with the Barrier we have. */ int Test::svc(void) { // Say hello to everyone first. ACE_DEBUG(( LM_INFO, "(%P|%t|%T) Created\n" )); // Increment and save the "tcount" value. We'll use it in // just a moment... int me = ++tcount_; // Wait for all initial threads to get to this point before we // go any further. This is standard barrier usage... barrier_.wait(); // Setup our random number generator. ACE_Time_Value now(ACE_OS::gettimeofday()); ACE_RANDR_TYPE seed = now.usec(); ACE_OS::srand(seed); int delay; // We'll arbitrarily choose the first activated thread to be // the controller. After it sleeps a few seconds, it will add // five threads. if( me == 1 ) { // Sleep from 1 to 10 seconds so that some of the other // threads will be into their for() loop. delay = ACE_OS::rand_r(seed)%10; ACE_OS::sleep(abs(delay)+1); // Make ourselves the barrier owner so that we can change // the number of threads. This should be done with care... barrier_.owner( ACE_OS::thr_self() ); // Add 5 threads to the barrier and then activate() to // make them real. Notice the third parameter to // activate(). Without this parameter, the threads won't // be created. if( barrier_.threads(threads_+5) == 0 ) { this->activate(THR_NEW_LWP,5,1); } } // This for() loop represents an "infinite" work loop in an // application. The theory is that the threads are dividing up // some work but need to "recalibrate" if more threads are // added. I'll just do five iterations so that the test // doesn't run forever. int i; for( i = 0 ; i < 5 ; ++i ) { // The sleep() represents time doing work. delay = ACE_OS::rand_r(seed)%7; ACE_OS::sleep(abs(delay)+1); ACE_DEBUG(( LM_INFO, "(%P|%t|%T)\tThread %.2d of %.2d iteration %.2d\n", me, threads_, i )); // If the local threads_ variable doesn't match the number // in the barrier, then the controller must have changed // the thread count. We'll wait() for everyone and then // recalibrate ourselves before continuing. if( this->threads_ != barrier_.threads() ) { ACE_DEBUG(( LM_INFO, "(%P|%t|%T) Waiting for thread count to increase to %d from %d\n", barrier_.threads(), this->threads_ )); // Wait for all our sibling threads... barrier_.wait(); // Set our local variable so that we don't come here again. this->threads_ = barrier_.threads(); // Recalibration can be anything you want. At this // point, we know that all of the threads are synch'd // and ready to go. } } // Re-synch all of the threads before they exit. This isn't // really necessary but I like to do it. barrier_.done(); return(0); }