~prepare_main_thread() noexcept { BOOL result = ConvertFiberToThread(); HPX_ASSERT(FALSE != result); HPX_UNUSED(result); }
void CoCleanup() { ConvertFiberToThread(); }
int clemuGPU :: scheduleNextJob(void) { int ret = CL_EMU_SUCCESS; // TO DO : SELECT JOB // SELECT SE, SIMD clemuKernelJob *nextJob = m_job[0]; int cur_gbl_tid; clemuGPU* dev = (clemuGPU*)(nextJob->GetDev()); nextJob->m_next_active_group = 0; nextJob->m_fiber.m_FIBER_id = ConvertThreadToFiberEx(nextJob,FIBER_FLAG_FLOAT_SWITCH); nextJob->m_fiber.m_FLS_index = 0; // TO DO: NBR GROUPS > SIMDS // run for(int j = 0, grp_id = 0, wf_id = 0; j < nextJob->m_nbr_grpDim[1]; j++) { for(int i = 0; i < nextJob->m_nbr_grpDim[0]; i++, grp_id++, wf_id += nextJob->m_nbr_wavefronts) { int active_grp = nextJob->m_next_active_group; // activate if ( !nextJob->m_groups[active_grp].m_fiber.m_FIBER_id ) { nextJob->m_groups[active_grp].m_fiber.m_FIBER_id = CreateFiberEx( CLEMU_STACKCOMMITSIZE, CLEMU_STACKRESERVESIZE, FIBER_FLAG_FLOAT_SWITCH, clemu_groupthread_proc, &nextJob->m_groups[active_grp]); assert(nextJob->m_groups[active_grp].m_fiber.m_FIBER_id); nextJob->m_groups[active_grp].m_fiber.m_FLS_index = 0; } if ( !nextJob->m_groups[active_grp].m_td_fibers ) { nextJob->m_groups[active_grp].m_td_fibers = new CL_EMU_FIBER[nextJob->GetNbrInstances()]; assert(nextJob->m_groups[active_grp].m_td_fibers); memset(nextJob->m_groups[active_grp].m_td_fibers, 0, sizeof(CL_EMU_FIBER) * nextJob->GetNbrInstances()); } for(int l = 0, tid = 0; l < nextJob->GetNbrWavefronts(); l++) { int wf_sz = nextJob->GetRealWFSz(l); for(int m = 0; m < wf_sz; m++, tid++) { if ( !nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id ) { nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id = CreateFiberEx( CLEMU_STACKCOMMITSIZE, CLEMU_STACKRESERVESIZE, FIBER_FLAG_FLOAT_SWITCH, clemu_wavefrontthread_proc, &nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m]); assert(nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id); nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FLS_index = 0; //FlsAlloc(0); nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_group_ptr = &nextJob->m_groups[active_grp]; } } } nextJob->m_groups[active_grp].m_parent = nextJob; nextJob->m_groups[active_grp].m_grpCoord[0] = i; nextJob->m_groups[active_grp].m_grpCoord[1] = j; nextJob->m_groups[active_grp].m_grpCoord[2] = 0; nextJob->m_groups[active_grp].m_group_id = grp_id; nextJob->m_groups[active_grp].m_se_id = 0; nextJob->m_groups[active_grp].m_simd_id = active_grp % dev->GetNbrSIMD(); nextJob->m_groups[active_grp].m_first_hwwavefront = wf_id; cur_gbl_tid = grp_id * nextJob->m_actual_nbr_instances; nextJob->m_groups[active_grp].m_kernel = nextJob->GetKernel(); nextJob->m_groups[active_grp].m_endof_group = 0; // local memory setup // EXPLANATION: // The size comes either from invokation (argument) or from teh kernel declaration. // if it comes from declaration for teh first group there will be 0 sz which is max size. // second group will det the correect size and will be separated from teh first group int local_mem_sz = (nextJob->m_localmem_sz > clemuGetCompiledKernelMemSz(nextJob->m_groups[active_grp].m_kernel)) ? nextJob->m_localmem_sz : clemuGetCompiledKernelMemSz(nextJob->m_groups[active_grp].m_kernel); nextJob->m_groups[active_grp].m_localmem_sz = local_mem_sz; nextJob->m_groups[active_grp].m_localmem_ptr = (unsigned char*)dev->AssignLclMem(nextJob->m_groups[active_grp].m_simd_id,active_grp,local_mem_sz); if ( nextJob->m_groups[active_grp].m_nmb_arg > 0 ) { for( int l = 0; l < nextJob->m_groups[active_grp].m_nmb_arg; l++) { if ( (nextJob->m_groups[active_grp].m_args[l].m_flags & CL_ARG_LCL_MEM_SZ) == CL_ARG_LCL_MEM_SZ ) { nextJob->m_groups[active_grp].m_args[l].m_flags |= CL_ARG_LDS_PTR; nextJob->m_groups[active_grp].m_args[l].m_arg.parg = nextJob->m_groups[active_grp].m_localmem_ptr; break; } } } // nextJob->m_groups[grp_id].m_wf_fibers = new CL_EMU_FIBER[nextJob->m_nbr_wavefronts]; // assert(nextJob->m_groups[grp_id].m_wf_fibers); if ( !nextJob->m_groups[active_grp].m_td_fibers ) { nextJob->m_groups[active_grp].m_td_fibers = new CL_EMU_FIBER[nextJob->m_actual_nbr_instances]; assert(nextJob->m_groups[active_grp].m_td_fibers); } for(int l = 0, tid = 0; l < nextJob->m_nbr_wavefronts; l++) { int wf_sz = nextJob->GetRealWFSz(l); for(int m = 0; m < wf_sz; m++, tid++) { nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_grp_tid = l * wf_sz + m; nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_gbl_tid = cur_gbl_tid + nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_grp_tid; nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_wfid = l; nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_wf_tid = m; } } // schedule group nextJob->m_curgroup = active_grp; SwitchToFiber(nextJob->m_groups[active_grp].m_fiber.m_FIBER_id); // free if ( nextJob->m_groups[active_grp].m_td_fibers ) { for(int l = 0, tid = 0; l < nextJob->GetNbrWavefronts(); l++) { int wf_sz = nextJob->GetRealWFSz(l); for(int m = 0; m < wf_sz; m++, tid++) { if ( nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id ) { DeleteFiber(nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id); nextJob->m_groups[active_grp].m_td_fibers[l*wf_sz + m].m_FIBER_id = 0; } } } delete [] nextJob->m_groups[active_grp].m_td_fibers; nextJob->m_groups[active_grp].m_td_fibers = 0; } if ( nextJob->m_groups[active_grp].m_fiber.m_FIBER_id ) { DeleteFiber(nextJob->m_groups[active_grp].m_fiber.m_FIBER_id); nextJob->m_groups[active_grp].m_fiber.m_FIBER_id = 0; } nextJob->m_next_active_group = (active_grp + 1) % nextJob->m_nbr_active_group; } } nextJob->FreeJob(); ConvertFiberToThread(); nextJob->m_fiber.m_FIBER_id = 0; return(ret); }