Example #1
0
 ~prepare_main_thread() noexcept
 {
     BOOL result = ConvertFiberToThread();
     HPX_ASSERT(FALSE != result);
     HPX_UNUSED(result);
 }
Example #2
0
	void CoCleanup()
	{
		ConvertFiberToThread();
	}
Example #3
0
int 
clemuGPU :: scheduleNextJob(void)
{
int ret = CL_EMU_SUCCESS;
// TO DO : SELECT JOB
// SELECT SE, SIMD
clemuKernelJob *nextJob = m_job[0];
int cur_gbl_tid;
clemuGPU* dev = (clemuGPU*)(nextJob->GetDev());

   nextJob->m_next_active_group = 0;
   nextJob->m_fiber.m_FIBER_id = ConvertThreadToFiberEx(nextJob,FIBER_FLAG_FLOAT_SWITCH);
   nextJob->m_fiber.m_FLS_index = 0;

// TO DO: NBR GROUPS > SIMDS
// run
   for(int j = 0, grp_id = 0, wf_id = 0; j < nextJob->m_nbr_grpDim[1]; j++)
   {
      for(int i = 0; i < nextJob->m_nbr_grpDim[0]; i++, grp_id++, wf_id += nextJob->m_nbr_wavefronts)
	  {
int active_grp = nextJob->m_next_active_group;
// activate

		  if ( !nextJob->m_groups[active_grp].m_fiber.m_FIBER_id )
		  {
		       nextJob->m_groups[active_grp].m_fiber.m_FIBER_id = 
			               CreateFiberEx( CLEMU_STACKCOMMITSIZE,
                                           CLEMU_STACKRESERVESIZE,
										   FIBER_FLAG_FLOAT_SWITCH,
										   clemu_groupthread_proc,
										   &nextJob->m_groups[active_grp]);

			   assert(nextJob->m_groups[active_grp].m_fiber.m_FIBER_id);
		       nextJob->m_groups[active_grp].m_fiber.m_FLS_index = 0;
		  }

		  if ( !nextJob->m_groups[active_grp].m_td_fibers )
		  {
	           nextJob->m_groups[active_grp].m_td_fibers = new CL_EMU_FIBER[nextJob->GetNbrInstances()];
		       assert(nextJob->m_groups[active_grp].m_td_fibers);
        	   memset(nextJob->m_groups[active_grp].m_td_fibers, 0, sizeof(CL_EMU_FIBER) * nextJob->GetNbrInstances()); 
		  }

		  for(int l = 0, tid = 0; l < nextJob->GetNbrWavefronts(); l++)
		  {
			  int wf_sz = nextJob->GetRealWFSz(l);
			  for(int m = 0; m < wf_sz; m++, tid++)
			  {
				  if ( !nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id )
				  {
                     nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id = 
						    CreateFiberEx( CLEMU_STACKCOMMITSIZE,
                                           CLEMU_STACKRESERVESIZE,
										   FIBER_FLAG_FLOAT_SWITCH,
						                   clemu_wavefrontthread_proc,
										   &nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m]);

					 assert(nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id);

				     nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FLS_index = 0; //FlsAlloc(0);
				     nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_group_ptr = &nextJob->m_groups[active_grp];
				  }

			  }
		  }



          nextJob->m_groups[active_grp].m_parent = nextJob;
		  nextJob->m_groups[active_grp].m_grpCoord[0] = i;
		  nextJob->m_groups[active_grp].m_grpCoord[1] = j;
		  nextJob->m_groups[active_grp].m_grpCoord[2] = 0;
		  nextJob->m_groups[active_grp].m_group_id = grp_id;
		  nextJob->m_groups[active_grp].m_se_id = 0;
		  nextJob->m_groups[active_grp].m_simd_id = active_grp % dev->GetNbrSIMD();
	      nextJob->m_groups[active_grp].m_first_hwwavefront = wf_id;

		  cur_gbl_tid = grp_id * nextJob->m_actual_nbr_instances;
          nextJob->m_groups[active_grp].m_kernel = nextJob->GetKernel();
		  nextJob->m_groups[active_grp].m_endof_group = 0;
// local memory setup
// EXPLANATION:
// The size comes either from invokation (argument) or from teh kernel declaration.
// if it comes from declaration for teh first group there will be 0 sz which is max size.
// second group will det the correect size and will be separated from teh first group
int local_mem_sz = (nextJob->m_localmem_sz > clemuGetCompiledKernelMemSz(nextJob->m_groups[active_grp].m_kernel)) ? nextJob->m_localmem_sz : clemuGetCompiledKernelMemSz(nextJob->m_groups[active_grp].m_kernel);
		  nextJob->m_groups[active_grp].m_localmem_sz = local_mem_sz;
		  nextJob->m_groups[active_grp].m_localmem_ptr = (unsigned char*)dev->AssignLclMem(nextJob->m_groups[active_grp].m_simd_id,active_grp,local_mem_sz); 
		  if ( nextJob->m_groups[active_grp].m_nmb_arg > 0 )
          {
		       for( int l = 0; l < nextJob->m_groups[active_grp].m_nmb_arg; l++)
		       {
		           if ( (nextJob->m_groups[active_grp].m_args[l].m_flags & CL_ARG_LCL_MEM_SZ) == CL_ARG_LCL_MEM_SZ )
		           {
					   nextJob->m_groups[active_grp].m_args[l].m_flags |= CL_ARG_LDS_PTR;
					   nextJob->m_groups[active_grp].m_args[l].m_arg.parg = nextJob->m_groups[active_grp].m_localmem_ptr;
					   break;
		           }
		       }
          }

//		  nextJob->m_groups[grp_id].m_wf_fibers = new CL_EMU_FIBER[nextJob->m_nbr_wavefronts];
//		  assert(nextJob->m_groups[grp_id].m_wf_fibers);
		  if ( !nextJob->m_groups[active_grp].m_td_fibers )
		  {
	           nextJob->m_groups[active_grp].m_td_fibers = new CL_EMU_FIBER[nextJob->m_actual_nbr_instances];
		       assert(nextJob->m_groups[active_grp].m_td_fibers);
		  }


		  for(int l = 0, tid = 0; l < nextJob->m_nbr_wavefronts; l++)
		  {
			  int wf_sz = nextJob->GetRealWFSz(l);
			  for(int m = 0; m < wf_sz; m++, tid++)
			  {
                  nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_grp_tid = l * wf_sz + m;
                  nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_gbl_tid = cur_gbl_tid + nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_grp_tid;
                  nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_wfid = l;
                  nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_wf_tid = m;
			  }
		  }

// schedule group
          nextJob->m_curgroup = active_grp;

		  SwitchToFiber(nextJob->m_groups[active_grp].m_fiber.m_FIBER_id);
// free

		  if ( nextJob->m_groups[active_grp].m_td_fibers )
		  {
		       for(int l = 0, tid = 0; l < nextJob->GetNbrWavefronts(); l++)
		       {
			  int wf_sz = nextJob->GetRealWFSz(l);
			       for(int m = 0; m < wf_sz; m++, tid++)
			       {
				       if ( nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id )
				       {
                           DeleteFiber(nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id);
					       nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id = 0;
				        }

			        }
		       }
	           delete [] nextJob->m_groups[active_grp].m_td_fibers;
		       nextJob->m_groups[active_grp].m_td_fibers = 0;
		  }
		  if ( nextJob->m_groups[active_grp].m_fiber.m_FIBER_id )
		  {
		      DeleteFiber(nextJob->m_groups[active_grp].m_fiber.m_FIBER_id);
		      nextJob->m_groups[active_grp].m_fiber.m_FIBER_id = 0;
		  }

          nextJob->m_next_active_group = (active_grp + 1) %  nextJob->m_nbr_active_group;
	  }
   }

   nextJob->FreeJob();

   ConvertFiberToThread();
   nextJob->m_fiber.m_FIBER_id = 0;

   return(ret);

}