예제 #1
0
 void InitializeAll(unsigned seed, double dev) {
     SetSeed(seed);
     SetDev(dev);
 }
예제 #2
0
int 
clemuKernelJob :: initJob(void *_device,
				const char * _program_location,
				const char * _program_nm,
		       const char*_kernel_entry_name,
		       int _domainDim,
			   int _domain[],
			   int _group[],
			   int _nmr_arg,
		       ClKrnlArg _args[])
{
int ret = CL_EMU_SUCCESS;
int actual_nbr_instances = 1;
clemuGPU *dev = (clemuGPU*)_device;
int nbr_wavefronts;

// TO AVOID MEM FAULT
    SetDev(dev);


    for( int i = 0; i < _domainDim; i++)
	{
	     m_nbr_grpDim[i] = (_domain[i] + _group[i] - 1) / _group[i];
		 actual_nbr_instances *= _group[i]; 
	}

	m_nbr_groups = m_nbr_grpDim[0] * m_nbr_grpDim[1] * m_nbr_grpDim[2];

    nbr_wavefronts = (actual_nbr_instances + dev->GetWFSz() - 1) / dev->GetWFSz();
	m_nmr_dim = _domainDim;
    m_grpDim[0] = m_grpDim[1] = m_grpDim[2] = 1;
    m_domainDim[0] = m_domainDim[1] = m_domainDim[2] = 1;
    for(int k = 0; k < _domainDim; k++)
    {
	    m_grpDim[k] = _group[k];
	    m_domainDim[k] = m_nbr_grpDim[k] * _group[k];
    }
	m_actual_nbr_instances = actual_nbr_instances;
    m_nbr_wavefronts = nbr_wavefronts;

    strcpy_s(m_kernel_name,_MAX_KERNEL_NAME,_kernel_entry_name);

// BIND KERNEL
    m_kernel = clemuFindCompiledKernel(_program_location,
		                               _program_nm,
									   _kernel_entry_name);

	m_args = 0;
	m_nmb_arg = _nmr_arg;
	m_localmem_sz = 0;
    if ( m_nmb_arg > 0 )
    {
        m_args = new ClKrnlArg[m_nmb_arg];
	    assert(m_args);
		for( int l = 0; l < m_nmb_arg; l++)
		{
           m_args[l] = _args[l];
		   m_args[l].arg_ptr = &m_args[l].m_arg.iarg;

		   if ( (m_args[l].m_flags & CL_ARG_LCL_MEM_SZ) == CL_ARG_LCL_MEM_SZ )
		   {
			   m_localmem_sz = m_args[l].m_len;
		   }

		}
    }

    m_last_wavesz = actual_nbr_instances % dev->GetWFSz(); 
	m_last_wavesz = (m_last_wavesz == 0 ) ? dev->GetWFSz() : m_last_wavesz;

    assert(m_kernel);
    m_nbr_active_group = dev->GetNbrSIMD() * dev->GetNbrGroupsPerSIMD();

	if ( !m_groups )
	{
	    m_groups = new clemuKernelGroup[m_nbr_active_group];
	    assert(m_groups);
		memset(m_groups, 0, sizeof(clemuKernelGroup)*m_nbr_active_group); 
	}


    for(int j = 0; j < m_nbr_active_group; j++)
	{
          m_groups[j].m_parent = this;
		  m_groups[j].m_kernel = m_kernel;
#if 0
		  if ( !m_groups[j].m_fiber.m_FIBER_id )
		  {
		       m_groups[j].m_fiber.m_FIBER_id = 
			               CreateFiberEx( CLEMU_STACKCOMMITSIZE,
                                           CLEMU_STACKRESERVESIZE,
										   FIBER_FLAG_FLOAT_SWITCH,
										   clemu_groupthread_proc,
										   &m_groups[j]);

			   assert(m_groups[j].m_fiber.m_FIBER_id);
		       m_groups[j].m_fiber.m_FLS_index = 0;
		  }

		  if ( !m_groups[j].m_td_fibers )
		  {
	           m_groups[j].m_td_fibers = new CL_EMU_FIBER[m_actual_nbr_instances];
		       assert(m_groups[j].m_td_fibers);
        	   memset(m_groups[j].m_td_fibers, 0, sizeof(CL_EMU_FIBER) * m_actual_nbr_instances); 
		  }

		  for(int l = 0, tid = 0; l < m_nbr_wavefronts; l++)
		  {
			  int wf_sz = GetRealWFSz(l);
			  for(int m = 0; m < wf_sz; m++, tid++)
			  {
				  if ( !m_groups[j].m_td_fibers[l*wf_sz + m].m_FIBER_id )
				  {
                     m_groups[j].m_td_fibers[l*wf_sz + m].m_FIBER_id = 
						    CreateFiberEx( CLEMU_STACKCOMMITSIZE,
                                           CLEMU_STACKRESERVESIZE,
										   FIBER_FLAG_FLOAT_SWITCH,
						                   clemu_wavefrontthread_proc,
										   &m_groups[j].m_td_fibers[l*wf_sz + m]);

					 assert(m_groups[j].m_td_fibers[l*wf_sz + m].m_FIBER_id);

				     m_groups[j].m_td_fibers[l*wf_sz + m].m_FLS_index = 0; //FlsAlloc(0);
				     m_groups[j].m_td_fibers[l*wf_sz + m].m_group_ptr = &m_groups[j];
				  }

			  }
		  }
#endif
// replicate arguments
		  if ( !m_groups[j].m_args && m_nmb_arg > 0 )
		  {
			  m_groups[j].m_nmb_arg = m_nmb_arg;
              m_groups[j].m_args = new ClKrnlArg[m_nmb_arg];
			  memset(m_groups[j].m_args, 0, sizeof(ClKrnlArg) * m_nmb_arg);
		  }

		  for( int l = 0; l < m_nmb_arg; l++)
		  {
              memcpy(&m_groups[j].m_args[l], &m_args[l], sizeof(ClKrnlArg));
		  }

	}


   return(ret);
}