void InitializeAll(unsigned seed, double dev) { SetSeed(seed); SetDev(dev); }
int clemuKernelJob :: initJob(void *_device, const char * _program_location, const char * _program_nm, const char*_kernel_entry_name, int _domainDim, int _domain[], int _group[], int _nmr_arg, ClKrnlArg _args[]) { int ret = CL_EMU_SUCCESS; int actual_nbr_instances = 1; clemuGPU *dev = (clemuGPU*)_device; int nbr_wavefronts; // TO AVOID MEM FAULT SetDev(dev); for( int i = 0; i < _domainDim; i++) { m_nbr_grpDim[i] = (_domain[i] + _group[i] - 1) / _group[i]; actual_nbr_instances *= _group[i]; } m_nbr_groups = m_nbr_grpDim[0] * m_nbr_grpDim[1] * m_nbr_grpDim[2]; nbr_wavefronts = (actual_nbr_instances + dev->GetWFSz() - 1) / dev->GetWFSz(); m_nmr_dim = _domainDim; m_grpDim[0] = m_grpDim[1] = m_grpDim[2] = 1; m_domainDim[0] = m_domainDim[1] = m_domainDim[2] = 1; for(int k = 0; k < _domainDim; k++) { m_grpDim[k] = _group[k]; m_domainDim[k] = m_nbr_grpDim[k] * _group[k]; } m_actual_nbr_instances = actual_nbr_instances; m_nbr_wavefronts = nbr_wavefronts; strcpy_s(m_kernel_name,_MAX_KERNEL_NAME,_kernel_entry_name); // BIND KERNEL m_kernel = clemuFindCompiledKernel(_program_location, _program_nm, _kernel_entry_name); m_args = 0; m_nmb_arg = _nmr_arg; m_localmem_sz = 0; if ( m_nmb_arg > 0 ) { m_args = new ClKrnlArg[m_nmb_arg]; assert(m_args); for( int l = 0; l < m_nmb_arg; l++) { m_args[l] = _args[l]; m_args[l].arg_ptr = &m_args[l].m_arg.iarg; if ( (m_args[l].m_flags & CL_ARG_LCL_MEM_SZ) == CL_ARG_LCL_MEM_SZ ) { m_localmem_sz = m_args[l].m_len; } } } m_last_wavesz = actual_nbr_instances % dev->GetWFSz(); m_last_wavesz = (m_last_wavesz == 0 ) ? dev->GetWFSz() : m_last_wavesz; assert(m_kernel); m_nbr_active_group = dev->GetNbrSIMD() * dev->GetNbrGroupsPerSIMD(); if ( !m_groups ) { m_groups = new clemuKernelGroup[m_nbr_active_group]; assert(m_groups); memset(m_groups, 0, sizeof(clemuKernelGroup)*m_nbr_active_group); } for(int j = 0; j < m_nbr_active_group; j++) { m_groups[j].m_parent = this; m_groups[j].m_kernel = m_kernel; #if 0 if ( !m_groups[j].m_fiber.m_FIBER_id ) { m_groups[j].m_fiber.m_FIBER_id = CreateFiberEx( CLEMU_STACKCOMMITSIZE, CLEMU_STACKRESERVESIZE, FIBER_FLAG_FLOAT_SWITCH, clemu_groupthread_proc, &m_groups[j]); assert(m_groups[j].m_fiber.m_FIBER_id); m_groups[j].m_fiber.m_FLS_index = 0; } if ( !m_groups[j].m_td_fibers ) { m_groups[j].m_td_fibers = new CL_EMU_FIBER[m_actual_nbr_instances]; assert(m_groups[j].m_td_fibers); memset(m_groups[j].m_td_fibers, 0, sizeof(CL_EMU_FIBER) * m_actual_nbr_instances); } for(int l = 0, tid = 0; l < m_nbr_wavefronts; l++) { int wf_sz = GetRealWFSz(l); for(int m = 0; m < wf_sz; m++, tid++) { if ( !m_groups[j].m_td_fibers[l*wf_sz + m].m_FIBER_id ) { m_groups[j].m_td_fibers[l*wf_sz + m].m_FIBER_id = CreateFiberEx( CLEMU_STACKCOMMITSIZE, CLEMU_STACKRESERVESIZE, FIBER_FLAG_FLOAT_SWITCH, clemu_wavefrontthread_proc, &m_groups[j].m_td_fibers[l*wf_sz + m]); assert(m_groups[j].m_td_fibers[l*wf_sz + m].m_FIBER_id); m_groups[j].m_td_fibers[l*wf_sz + m].m_FLS_index = 0; //FlsAlloc(0); m_groups[j].m_td_fibers[l*wf_sz + m].m_group_ptr = &m_groups[j]; } } } #endif // replicate arguments if ( !m_groups[j].m_args && m_nmb_arg > 0 ) { m_groups[j].m_nmb_arg = m_nmb_arg; m_groups[j].m_args = new ClKrnlArg[m_nmb_arg]; memset(m_groups[j].m_args, 0, sizeof(ClKrnlArg) * m_nmb_arg); } for( int l = 0; l < m_nmb_arg; l++) { memcpy(&m_groups[j].m_args[l], &m_args[l], sizeof(ClKrnlArg)); } } return(ret); }