Exemple #1
0
	void CXtzThreadPool::AddWork( WorkerFuntion worker, void* arg )
	{
		Lock<SpinLock> lock( m_lockObject );
		if ( m_threadquere.empty() )
		{
			m_workquere.emplace_back( worker, arg );
		}
		else
		{
			CXtzThread* thread = m_threadquere.front( );
			m_threadquere.pop_front( );
			WORK threadWork( worker, arg );
			thread->SetWork( threadWork );
			thread->WakeUp( );
		}
	}
Exemple #2
0
//initialize perthread arrays
//uint32_t threadMain(const uint32_t num_requests)
uint32_t threadMain(void* thread_id)
{
//   uint32_t g_num_elements_initial = g_num_elements;
   g_num_elements_initial = g_num_elements;
   uint32_t tid = (int) *(int*) thread_id;

   // Accesses should hit cache-line addresses, to eliminate spatial locality
   // and force more misses. Therefore, make the array size a multiple of the
   // stride; round up we have to deal with this also working once we split
   // array into smaller pieces
   if ((g_num_elements / g_num_requests)  % CACHELINE_SZ != 0 || g_num_elements < g_num_requests)
   {
      uint32_t num_elements_per_req = g_num_elements / g_num_requests; 
      num_elements_per_req = num_elements_per_req + ((CACHELINE_SZ) - (num_elements_per_req % CACHELINE_SZ));
      g_num_elements = g_num_requests * num_elements_per_req;
   }
      
#ifdef DEBUG
   fprintf(stderr, "adjusted size of array = %d\n", g_num_elements);
#endif

   uint32_t* arr_n_ptr = (uint32_t *) malloc((g_num_elements) * sizeof(uint32_t));

   for (uint32_t i = 0; i < g_num_elements; i++)
      arr_n_ptr[i] = 1337;

   uint32_t stride = CACHELINE_SZ;
   
   
   // Provide each request its own "array" to pointer chase on This prevents
   // the processor from consolidating request streams The fact that we are
   // using a single array to hold all of this is a bit too "clever", but it
   // saves cycles in the critical loop from figuring out which array to use.
   for (int i=0; i < g_num_requests; i++)
   {
      uint32_t num_elements_per_req = g_num_elements / g_num_requests;

      //printf("tid=%d, i=%d of %d,  num_elements = %d, num_el_per_req= %d  Size Per Req= %d bytes cacheline_sz = %d, (num_el_per_req mod cl_sz=%d)   &(array[%d])\n\n",
      //   tid,
      //   i,
      //   g_num_requests,
      //   g_num_elements,
      //   num_elements_per_req,
      //   num_elements_per_req * 4,
      //   CACHELINE_SZ,
      //   num_elements_per_req % CACHELINE_SZ,
      //   i * num_elements_per_req
      //   );
     
      initializeGlobalArrays( arr_n_ptr, 
                              num_elements_per_req,
                              stride,
                              i * num_elements_per_req);
   }
   

//   printf("tid=%d SUCCESS\n", tid);

   // this volatile ret_val is crucial, otherwise the entire run-loop 
   // gets optimized away! :(
//   uint32_t volatile ret_val = threadWork(g_num_requests);  
   uint32_t volatile ret_val = threadWork(tid, arr_n_ptr);  

   return ret_val;
}