Esempio n. 1
0
  void do_work(const Iter first, const Iter last, size_t thread_index) {
    using namespace std;
    auto mean = calc_mean(first, last);
    local_results[thread_index] = mean;
    b.wait();
    if (thread_index == num_threads - 1) {
      cout << "local means: ";
      print_all(local_results.begin(), local_results.end());
      global_mean = std::accumulate(local_results.begin(), local_results.end(), 0) /
                    num_threads;
      cout << "global mean: " << global_mean << endl;
      // TODO: local mean of this (master) thread
    }
    b.wait();
    const auto variance = calc_variance(first, last, global_mean);
    local_results[thread_index] = variance;
    b.wait();

    if(thread_index == num_threads - 1) {
      cout << "local variance: ";
      print_all(local_results.begin(), local_results.end());
      cout << endl;
      global_deviation =
          sqrt(std::accumulate(local_results.begin(), local_results.end(), 0)/num_threads);
    }
  }
Esempio n. 2
0
	Error operator()(AsyncLoaderTaskContext& ctx)
	{
		if(m_count)
		{
			auto x = m_count->fetchAdd(1);

			if(m_id >= 0)
			{
				if(m_id != static_cast<I32>(x))
				{
					ANKI_LOGE("Wrong excecution order");
					return ErrorCode::FUNCTION_FAILED;
				}
			}
		}

		if(m_sleepTime != 0.0)
		{
			HighRezTimer::sleep(m_sleepTime);
		}

		if(m_barrier)
		{
			m_barrier->wait();
		}

		ctx.m_pause = m_pause;
		ctx.m_resubmitTask = m_resubmit;
		m_resubmit = false;

		return ErrorCode::NONE;
	}
Esempio n. 3
0
/** Code to execute in the thread.
 * Executes loop() in each cycle. This is the default implementation and if
 * you need a more specific behaviour you can override this run() method and
 * ignore loop().
 * Although this method is declared virtual, it should not be overridden, other
 * than with the following trivial snippet:
 * @code
 * protected: virtual void run() { Thread::run(); }
 * @endcode
 * The reason not to do other changes is that it contains complex house keeping
 * code that the system relies on. The reason for still allowing the override is
 * solely to make reading back traces in your debugger easier. Because now there
 * the class name of the thread sub-class will appear in the back trace, while
 * it would not otherwise.
 */
void
Thread::run()
{
  if ( __op_mode == OPMODE_WAITFORWAKEUP ) {
    // Wait for initial wakeup
    // __sleep_mutex has been locked in entry() already!
    while (__pending_wakeups == 0) {
      __waiting_for_wakeup = true;
      __sleep_condition->wait();
    }
    __pending_wakeups -= 1;
    __sleep_mutex->unlock();
  }

  forever {

    loopinterrupt_antistarve_mutex->stopby();

    loop_mutex->lock();
    if ( ! finalize_prepared ) {
      __loop_done = false;
      loop();
    }
    loop_mutex->unlock();

    __loop_done_mutex->lock();
    __loop_done = true;
    __loop_done_mutex->unlock();
    __loop_done_waitcond->wake_all();

    test_cancel();
    if ( __op_mode == OPMODE_WAITFORWAKEUP ) {
      if ( __barrier ) {
	__sleep_mutex->lock();
        Barrier *b = __barrier;
        __barrier = NULL;
	__sleep_mutex->unlock();

	b->wait();

	__sleep_mutex->lock();
      } else {
	__sleep_mutex->lock();
      }

      while (__pending_wakeups == 0) {
	__waiting_for_wakeup = true;
	__sleep_condition->wait();
      }
      __pending_wakeups -= 1;
      __sleep_mutex->unlock();
    }
    yield();
  }
}
 /*
  * this is called by our "fake" BpGraphicBufferProducer. We package the
  * data and reply Parcel and forward them to the calling thread.
  */
 virtual status_t transact(uint32_t code,
         const Parcel& data, Parcel* reply, uint32_t flags) {
     this->code = code;
     this->data = &data;
     this->reply = reply;
     android_atomic_acquire_store(0, &memoryBarrier);
     if (exitPending) {
         // if we've exited, we run the message synchronously right here
         handleMessage(Message(MSG_API_CALL));
     } else {
         barrier.close();
         looper->sendMessage(this, Message(MSG_API_CALL));
         barrier.wait();
     }
     return NO_ERROR;
 }
Esempio n. 5
0
	Error operator()(AsyncLoaderTaskContext& ctx)
	{
		void* mem = m_alloc.allocate(10);
		if(!mem)
			return ErrorCode::FUNCTION_FAILED;

		HighRezTimer::sleep(0.1);

		m_alloc.deallocate(mem, 10);

		if(m_barrier)
		{
			m_barrier->wait();
		}

		return ErrorCode::NONE;
	}
Esempio n. 6
0
    void retireTask(Task* task) {
        std::unique_lock<std::mutex> counterLock(m_threadCounterMutex);
        m_threadsDoneCount++;

        if (m_threadsDoneCount >= m_totalThreads) {
            task->notifyComplete();
            m_threadsDoneCount = 0;

            std::unique_lock<std::mutex> queueLock(m_queueMutex);
            m_tasks.pop();

            if (m_tasks.empty()) {
                m_tasksComplete.notify_all();
            }
        }

        counterLock.unlock();
        m_barrier.wait();
    }
Esempio n. 7
0
//
// All hardware threads start execution here
//
int main()
{
	Fiber::initSelf();

#if 0
	Core::current()->addFiber(new TestFiber('0' + Core::currentStrandId()));
	while (true)
		Core::reschedule();
#endif

	render::Rasterizer rasterizer(kFbWidth, kFbHeight);
	render::RenderTarget renderTarget;
	renderTarget.setColorBuffer(&gColorBuffer);
	renderTarget.setZBuffer(&gZBuffer);
#if DRAW_TORUS
#if GOURAND_SHADER
	GourandVertexShader vertexShader;
	GourandPixelShader pixelShader(&renderTarget);
#else
	PhongVertexShader vertexShader;
	PhongPixelShader pixelShader(&renderTarget);
#endif

	const float *vertices = kTorusVertices;
	int numVertices = kNumTorusVertices;
	const int *indices = kTorusIndices;
	int numIndices = kNumTorusIndices;
#elif DRAW_CUBE
	TextureVertexShader vertexShader;
	TexturePixelShader pixelShader(&renderTarget);
	pixelShader.bindTexture(&texture);
	const float *vertices = kCubeVertices;	
	int numVertices = kNumCubeVertices;
	const int *indices = kCubeIndices;
	int numIndices = kNumCubeIndices;
#elif DRAW_TEAPOT
#if GOURAND_SHADER
	GourandVertexShader vertexShader;
	GourandPixelShader pixelShader(&renderTarget);
#else
	PhongVertexShader vertexShader;
	PhongPixelShader pixelShader(&renderTarget);
#endif

	const float *vertices = kTeapotVertices;
	int numVertices = kNumTeapotVertices;
	const int *indices = kTeapotIndices;
	int numIndices = kNumTeapotIndices;
#endif

	const float kAspectRatio = float(kFbWidth) / float(kFbHeight);
	const float kProjCoeff[4][4] = {
		{ 1.0f / kAspectRatio, 0.0, 0.0, 0.0 },
		{ 0.0, 1.0, 0.0, 0.0 },
		{ 0.0, 0.0, 1.0, 0.0 },
		{ 0.0, 0.0, 1.0, 0.0 },
	};

	vertexShader.setProjectionMatrix(Matrix(kProjCoeff));

#if DRAW_TORUS
	vertexShader.applyTransform(translate(0.0f, 0.0f, 1.5f));
	vertexShader.applyTransform(rotateAboutAxis(M_PI / 3.5, 0.707f, 0.707f, 0.0f));
#elif DRAW_CUBE
	vertexShader.applyTransform(translate(0.0f, 0.0f, 2.0f));
	vertexShader.applyTransform(rotateAboutAxis(M_PI / 3.5, 0.707f, 0.707f, 0.0f));
#elif DRAW_TEAPOT
	vertexShader.applyTransform(translate(0.0f, 0.1f, 0.25f));
	vertexShader.applyTransform(rotateAboutAxis(M_PI, -1.0f, 0.0f, 0.0f));
#endif

	Matrix rotateStepMatrix(rotateAboutAxis(M_PI / 8, 0.707f, 0.707f, 0.0f));
	
	pixelShader.enableZBuffer(true);
//	pixelShader.enableBlend(true);

	if (Core::currentStrandId() == 0)
		gVertexParams = (float*) allocMem(16384 * sizeof(float));

	gInitBarrier.wait();

	int numVertexParams = vertexShader.getNumParams();

	for (int frame = 0; frame < 1; frame++)
	{
		//
		// Geometry phase.  Statically assign groups of 16 vertices to threads. Although these may be 
		// handled in arbitrary order, they are put into gVertexParams in proper order (this is a sort
		// middle architecture, and gVertexParams is in the middle).
		//
		int vertexIndex = Core::currentStrandId() * 16;
		while (vertexIndex < numVertices)
		{
			vertexShader.processVertices(gVertexParams + vertexShader.getNumParams() * vertexIndex, 
				vertices + vertexShader.getNumAttribs() * vertexIndex, numVertices - vertexIndex);
			vertexIndex += 16 * kNumCores * kHardwareThreadsPerCore;
		}

		if (Core::currentStrandId() == 0)
			gNextTileIndex = 0;

		vertexShader.applyTransform(rotateStepMatrix);
		gGeometryBarrier.wait();

		//
		// Pixel phase
		//

#if WIREFRAME
		if (Core::currentStrandId() == 0)
		{
			// Only thread 0 does wireframes

			for (int tileY = 0; tileY < kFbHeight; tileY += kTileSize)
			{
				for (int tileX = 0; tileX < kFbWidth; tileX += kTileSize)
					renderTarget.getColorBuffer()->clearTile(tileX, tileY, 0);
			}

			for (int vidx = 0; vidx < numIndices; vidx += 3)
			{
				int offset0 = indices[vidx] * numVertexParams;
				int offset1 = indices[vidx + 1] * numVertexParams;
				int offset2 = indices[vidx + 2] * numVertexParams;
			
				float x0 = gVertexParams[offset0 + kParamX];
				float y0 = gVertexParams[offset0 + kParamY];
				float x1 = gVertexParams[offset1 + kParamX];
				float y1 = gVertexParams[offset1 + kParamY];
				float x2 = gVertexParams[offset2 + kParamX];
				float y2 = gVertexParams[offset2 + kParamY];

				// Convert screen space coordinates to raster coordinates
				int x0Rast = x0 * kFbWidth / 2 + kFbWidth / 2;
				int y0Rast = y0 * kFbHeight / 2 + kFbHeight / 2;
				int x1Rast = x1 * kFbWidth / 2 + kFbWidth / 2;
				int y1Rast = y1 * kFbHeight / 2 + kFbHeight / 2;
				int x2Rast = x2 * kFbWidth / 2 + kFbWidth / 2;
				int y2Rast = y2 * kFbHeight / 2 + kFbHeight / 2;

				drawLine(&gColorBuffer, x0Rast, y0Rast, x1Rast, y1Rast, 0xffffffff);
				drawLine(&gColorBuffer, x1Rast, y1Rast, x2Rast, y2Rast, 0xffffffff);
				drawLine(&gColorBuffer, x2Rast, y2Rast, x0Rast, y0Rast, 0xffffffff);
			}
			
			for (int tileY = 0; tileY < kFbHeight; tileY += kTileSize)
			{
				for (int tileX = 0; tileX < kFbWidth; tileX += kTileSize)
					renderTarget.getColorBuffer()->flushTile(tileX, tileY);
			}
		}
		
#else // #if WIREFRAME
		while (gNextTileIndex < kMaxTileIndex)
		{
			// Grab the next available tile to begin working on.
			int myTileIndex = __sync_fetch_and_add(&gNextTileIndex, 1);
			if (myTileIndex >= kMaxTileIndex)
				break;

			int tileX = (myTileIndex % kTilesPerRow) * kTileSize;
			int tileY = (myTileIndex / kTilesPerRow) * kTileSize;

			renderTarget.getColorBuffer()->clearTile(tileX, tileY, 0);
			if (pixelShader.isZBufferEnabled())
			{
				// XXX Ideally, we'd initialize to infinity, but comparisons
				// with infinity are broken in hardware.  For now, initialize
				// to a very large number
				renderTarget.getZBuffer()->clearTile(tileX, tileY, 0x7e000000);
			}

			// Cycle through all triangles and attempt to render into this 
			// NxN tile.
			for (int vidx = 0; vidx < numIndices; vidx += 3)
			{
				int offset0 = indices[vidx] * numVertexParams;
				int offset1 = indices[vidx + 1] * numVertexParams;
				int offset2 = indices[vidx + 2] * numVertexParams;
			
				float x0 = gVertexParams[offset0 + kParamX];
				float y0 = gVertexParams[offset0 + kParamY];
				float z0 = gVertexParams[offset0 + kParamZ];
				float x1 = gVertexParams[offset1 + kParamX];
				float y1 = gVertexParams[offset1 + kParamY];
				float z1 = gVertexParams[offset1 + kParamZ];
				float x2 = gVertexParams[offset2 + kParamX];
				float y2 = gVertexParams[offset2 + kParamY];
				float z2 = gVertexParams[offset2 + kParamZ];

				// Convert screen space coordinates to raster coordinates
				int x0Rast = x0 * kFbWidth / 2 + kFbWidth / 2;
				int y0Rast = y0 * kFbHeight / 2 + kFbHeight / 2;
				int x1Rast = x1 * kFbWidth / 2 + kFbWidth / 2;
				int y1Rast = y1 * kFbHeight / 2 + kFbHeight / 2;
				int x2Rast = x2 * kFbWidth / 2 + kFbWidth / 2;
				int y2Rast = y2 * kFbHeight / 2 + kFbHeight / 2;

#if ENABLE_BOUNDING_BOX_CHECK
				// Bounding box check.  If triangles are not within this tile,
				// skip them.
				int xMax = tileX + kTileSize;
				int yMax = tileY + kTileSize;
				if ((x0Rast < tileX && x1Rast < tileX && x2Rast < tileX)
					|| (y0Rast < tileY && y1Rast < tileY && y2Rast < tileY)
					|| (x0Rast > xMax && x1Rast > xMax && x2Rast > xMax)
					|| (y0Rast > yMax && y1Rast > yMax && y2Rast > yMax))
					continue;
#endif

#if ENABLE_BACKFACE_CULL
				// Backface cull triangles that are facing away from camera.
				// We also remove triangles that are edge on here, since they
				// won't be rasterized correctly.
				if ((x1Rast - x0Rast) * (y2Rast - y0Rast) - (y1Rast - y0Rast) 
					* (x2Rast - x0Rast) <= 0)
					continue;
#endif

				// Set up parameters and rasterize triangle.
				pixelShader.setUpTriangle(x0, y0, z0, x1, y1, z1, x2, y2, z2);
				for (int paramI = 0; paramI < numVertexParams; paramI++)
				{
					pixelShader.setUpParam(paramI, 
						gVertexParams[offset0 + paramI + 4],
						gVertexParams[offset1 + paramI + 4], 
						gVertexParams[offset2 + paramI + 4]);
				}

				rasterizer.fillTriangle(&pixelShader, tileX, tileY,
					x0Rast, y0Rast, x1Rast, y1Rast, x2Rast, y2Rast);
			}

			renderTarget.getColorBuffer()->flushTile(tileX, tileY);
		}
#endif	// #if WIREFRAME
		
		gPixelBarrier.wait();
	}
	
	return 0;
}
/* Callback routine for synchronization of C++11 threads. */
void cpp11thread_sync(int tid, int tsize, void *data)
{
  Barrier *barrier = reinterpret_cast<Barrier*>(data);

  barrier->wait();
}
/* svc() will execute in each thread & do a few things with the
   Barrier we have.
 */
int Test::svc(void)
{
        // Say hello to everyone first.
    ACE_DEBUG(( LM_INFO, "(%P|%t|%T) Created\n" ));

        // Increment and save the "tcount" value.  We'll use it in
        // just a moment...
    int me = ++tcount_;

        // Wait for all initial threads to get to this point before we
        // go any further.  This is standard barrier usage...
    barrier_.wait();

        // Setup our random number generator.
    ACE_Time_Value now(ACE_OS::gettimeofday());
    ACE_RANDR_TYPE seed = now.usec();
    ACE_OS::srand(seed);
    int delay;

        // We'll arbitrarily choose the first activated thread to be
        // the controller.  After it sleeps a few seconds, it will add
        // five threads.
    if( me == 1 )
    {
            // Sleep from 1 to 10 seconds so that some of the other
            // threads will be into their for() loop.
        delay = ACE_OS::rand_r(seed)%10;
        ACE_OS::sleep(abs(delay)+1);

            // Make ourselves the barrier owner so that we can change
            // the number of threads.  This should be done with care...
        barrier_.owner( ACE_OS::thr_self() );

            // Add 5 threads to the barrier and then activate() to
            // make them real.  Notice the third parameter to
            // activate().  Without this parameter, the threads won't
            // be created.
        if( barrier_.threads(threads_+5) == 0 )
        {
            this->activate(THR_NEW_LWP,5,1);
        }
    }

        // This for() loop represents an "infinite" work loop in an
        // application. The theory is that the threads are dividing up
        // some work but need to "recalibrate" if more threads are
        // added.  I'll just do five iterations so that the test
        // doesn't run forever.
    int i;
    for( i = 0 ; i < 5 ; ++i )
    {
            // The sleep() represents time doing work.
        delay = ACE_OS::rand_r(seed)%7;
        ACE_OS::sleep(abs(delay)+1);

        ACE_DEBUG(( LM_INFO, "(%P|%t|%T)\tThread %.2d of %.2d iteration %.2d\n", me, threads_, i ));

            // If the local threads_ variable doesn't match the number
            // in the barrier, then the controller must have changed
            // the thread count.  We'll wait() for everyone and then
            // recalibrate ourselves before continuing.
        if( this->threads_ != barrier_.threads() )
        {
            ACE_DEBUG(( LM_INFO, "(%P|%t|%T) Waiting for thread count to increase to %d from %d\n",
                        barrier_.threads(), this->threads_ ));

                // Wait for all our sibling threads...
            barrier_.wait();

                // Set our local variable so that we don't come here again.
            this->threads_ = barrier_.threads();

                // Recalibration can be anything you want.  At this
                // point, we know that all of the threads are synch'd
                // and ready to go.
        }
    }

        // Re-synch all of the threads before they exit.  This isn't
        // really necessary but I like to do it.
    barrier_.done();

    return(0);
}