void evg_compute_unit_map_work_group(struct evg_compute_unit_t *compute_unit, struct evg_work_group_t *work_group) { struct evg_ndrange_t *ndrange = work_group->ndrange; struct evg_wavefront_t *wavefront; int wavefront_id; /* Map work-group */ assert(compute_unit->work_group_count < evg_gpu->work_groups_per_compute_unit); assert(!work_group->id_in_compute_unit); while (work_group->id_in_compute_unit < evg_gpu->work_groups_per_compute_unit && compute_unit->work_groups[work_group->id_in_compute_unit]) work_group->id_in_compute_unit++; assert(work_group->id_in_compute_unit < evg_gpu->work_groups_per_compute_unit); compute_unit->work_groups[work_group->id_in_compute_unit] = work_group; compute_unit->work_group_count++; /* If compute unit reached its maximum load, remove it from 'ready' list. * Otherwise, move it to the end of the 'ready' list. */ assert(DOUBLE_LINKED_LIST_MEMBER(evg_gpu, ready, compute_unit)); DOUBLE_LINKED_LIST_REMOVE(evg_gpu, ready, compute_unit); if (compute_unit->work_group_count < evg_gpu->work_groups_per_compute_unit) DOUBLE_LINKED_LIST_INSERT_TAIL(evg_gpu, ready, compute_unit); /* If this is the first scheduled work-group, insert to 'busy' list. */ if (!DOUBLE_LINKED_LIST_MEMBER(evg_gpu, busy, compute_unit)) DOUBLE_LINKED_LIST_INSERT_TAIL(evg_gpu, busy, compute_unit); /* Assign wavefronts identifiers in compute unit */ EVG_FOREACH_WAVEFRONT_IN_WORK_GROUP(work_group, wavefront_id) { wavefront = ndrange->wavefronts[wavefront_id]; wavefront->id_in_compute_unit = work_group->id_in_compute_unit * ndrange->wavefronts_per_work_group + wavefront->id_in_work_group; }
void si_compute_unit_map_work_group(struct si_compute_unit_t *compute_unit, struct si_work_group_t *work_group) { struct si_ndrange_t *ndrange = work_group->ndrange; struct si_wavefront_t *wavefront; int wavefront_id; int ib_id; assert(compute_unit->work_group_count < si_gpu->work_groups_per_compute_unit); assert(!work_group->id_in_compute_unit); /* Find an available slot */ while (work_group->id_in_compute_unit < si_gpu->work_groups_per_compute_unit && compute_unit->work_groups[work_group->id_in_compute_unit]) { work_group->id_in_compute_unit++; } assert(work_group->id_in_compute_unit < si_gpu->work_groups_per_compute_unit); compute_unit->work_groups[work_group->id_in_compute_unit] = work_group; compute_unit->work_group_count++; /* If compute unit reached its maximum load, remove it from * 'compute_unit_ready' list. Otherwise, move it to the end of * the 'compute_unit_ready' list. */ assert(DOUBLE_LINKED_LIST_MEMBER(si_gpu, compute_unit_ready, compute_unit)); DOUBLE_LINKED_LIST_REMOVE(si_gpu, compute_unit_ready, compute_unit); if (compute_unit->work_group_count < si_gpu->work_groups_per_compute_unit) { DOUBLE_LINKED_LIST_INSERT_TAIL(si_gpu, compute_unit_ready, compute_unit); } /* If this is the first scheduled work-group, insert to * 'compute_unit_busy' list. */ if (!DOUBLE_LINKED_LIST_MEMBER(si_gpu, compute_unit_busy, compute_unit)) { DOUBLE_LINKED_LIST_INSERT_TAIL(si_gpu, compute_unit_busy, compute_unit); } /* Assign wavefronts identifiers in compute unit */ SI_FOREACH_WAVEFRONT_IN_WORK_GROUP(work_group, wavefront_id) { wavefront = ndrange->wavefronts[wavefront_id]; wavefront->id_in_compute_unit = work_group->id_in_compute_unit * ndrange->wavefronts_per_work_group + wavefront->id_in_work_group; }
void EvgGpuCreate(EvgGpu *self) { struct evg_compute_unit_t *compute_unit; int compute_unit_id; /* Parent */ TimingCreate(asTiming(self)); /* Frequency */ asTiming(self)->frequency = evg_gpu_frequency; asTiming(self)->frequency_domain = esim_new_domain(evg_gpu_frequency); /* Initialize */ self->trash_uop_list = linked_list_create(); self->compute_units = xcalloc(evg_gpu_num_compute_units, sizeof(void *)); EVG_GPU_FOREACH_COMPUTE_UNIT(compute_unit_id) { self->compute_units[compute_unit_id] = evg_compute_unit_create(); compute_unit = self->compute_units[compute_unit_id]; compute_unit->id = compute_unit_id; DOUBLE_LINKED_LIST_INSERT_TAIL(self, ready, compute_unit); } /* Virtual functions */ asObject(self)->Dump = EvgGpuDump; asTiming(self)->DumpSummary = EvgGpuDumpSummary; asTiming(self)->Run = EvgGpuRun; asTiming(self)->MemConfigCheck = EvgGpuMemConfigCheck; asTiming(self)->MemConfigDefault = EvgGpuMemConfigDefault; asTiming(self)->MemConfigParseEntry = EvgGpuMemConfigParseEntry; }
struct evg_ndrange_t *evg_ndrange_create(struct evg_opencl_kernel_t *kernel) { struct evg_ndrange_t *ndrange; /* Allocate */ ndrange = calloc(1, sizeof(struct evg_ndrange_t)); if (!ndrange) fatal("%s: out of memory", __FUNCTION__); /* Insert in ND-Range list of Evergreen emulator */ DOUBLE_LINKED_LIST_INSERT_TAIL(evg_emu, ndrange, ndrange); /* Name */ ndrange->name = strdup(kernel->name); if (!ndrange->name) fatal("%s: out of memory", __FUNCTION__); /* Initialize */ ndrange->kernel = kernel; ndrange->local_mem_top = kernel->func_mem_local; ndrange->id = evg_emu->ndrange_count++; /* Instruction histogram */ if (evg_emu_report_file) { ndrange->inst_histogram = calloc(EVG_INST_COUNT, sizeof(unsigned int)); if (!ndrange->inst_histogram) fatal("%s: out of memory", __FUNCTION__); } /* Return */ return ndrange; }
static void si_gpu_device_init() { struct si_compute_unit_t *compute_unit; int compute_unit_id; /* Create device */ si_gpu = calloc(1, sizeof(struct si_gpu_t)); if (!si_gpu) fatal("%s: out of memory", __FUNCTION__); /* Initialize */ si_gpu->trash_uop_list = linked_list_create(); /* Create compute units */ si_gpu->compute_units = calloc(si_gpu_num_compute_units, sizeof(void *)); if (!si_gpu->compute_units) fatal("%s: out of memory", __FUNCTION__); /* Initialize compute units */ SI_GPU_FOREACH_COMPUTE_UNIT(compute_unit_id) { si_gpu->compute_units[compute_unit_id] = si_compute_unit_create(); compute_unit = si_gpu->compute_units[compute_unit_id]; compute_unit->id = compute_unit_id; DOUBLE_LINKED_LIST_INSERT_TAIL(si_gpu, compute_unit_ready, compute_unit); } /* Trace */ si_trace_header("si.init version=\"%d.%d\" num_compute_units=%d\n", SI_TRACE_VERSION_MAJOR, SI_TRACE_VERSION_MINOR, si_gpu_num_compute_units); }
void frm_grid_set_status(struct frm_grid_t *grid, enum frm_grid_status_t status) { /* Get only the new bits */ status &= ~grid->status; /* Add ND-Range to lists */ if (status & frm_grid_pending) DOUBLE_LINKED_LIST_INSERT_TAIL(frm_emu, pending_grid, grid); if (status & frm_grid_running) DOUBLE_LINKED_LIST_INSERT_TAIL(frm_emu, running_grid, grid); if (status & frm_grid_finished) DOUBLE_LINKED_LIST_INSERT_TAIL(frm_emu, finished_grid, grid); /* Update it */ grid->status |= status; }
struct si_ndrange_t *si_ndrange_create(struct si_opencl_kernel_t *kernel) { struct si_ndrange_t *ndrange; /* Insert in ND-Range list of SouthernIslands emulator */ ndrange = xcalloc(1, sizeof(struct si_ndrange_t)); DOUBLE_LINKED_LIST_INSERT_TAIL(si_emu, ndrange, ndrange); /* Initialize */ ndrange->name = xstrdup(kernel->name); ndrange->kernel = kernel; ndrange->local_mem_top = kernel->func_mem_local; ndrange->id = si_emu->ndrange_count++; ndrange->num_vgprs = kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used; ndrange->num_sgprs = kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used; /* Create the UAV-to-physical-address lookup lists */ ndrange->uav_list = list_create(); /* Instruction histogram */ if (si_emu_report_file) ndrange->inst_histogram = xcalloc(SI_INST_COUNT, sizeof(unsigned int)); /* Return */ return ndrange; }
/* Enqueue access in module wait list. */ void mod_stack_wait_in_mod(struct mod_stack_t *stack, struct mod_t *mod, int event) { assert(mod == stack->mod); assert(!DOUBLE_LINKED_LIST_MEMBER(mod, waiting, stack)); stack->waiting_list_event = event; DOUBLE_LINKED_LIST_INSERT_TAIL(mod, waiting, stack); }
/* Enqueue access in port wait list. */ void mod_stack_wait_in_port(struct mod_stack_t *stack, struct mod_port_t *port, int event) { assert(port == stack->port); assert(!DOUBLE_LINKED_LIST_MEMBER(port, waiting, stack)); stack->waiting_list_event = event; DOUBLE_LINKED_LIST_INSERT_TAIL(port, waiting, stack); }
void si_work_group_set_status(struct si_work_group_t *work_group, enum si_work_group_status_t status) { struct si_ndrange_t *ndrange = work_group->ndrange; /* Get only the new bits */ status &= ~work_group->status; /* Add work-group to lists */ if (status & si_work_group_pending) DOUBLE_LINKED_LIST_INSERT_TAIL(ndrange, pending, work_group); if (status & si_work_group_running) DOUBLE_LINKED_LIST_INSERT_TAIL(ndrange, running, work_group); if (status & si_work_group_finished) DOUBLE_LINKED_LIST_INSERT_TAIL(ndrange, finished, work_group); /* Update it */ work_group->status |= status; }
/* Enqueue access in stack wait list. */ void mod_stack_wait_in_stack(struct mod_stack_t *stack, struct mod_stack_t *master_stack, int event) { assert(master_stack != stack); assert(!DOUBLE_LINKED_LIST_MEMBER(master_stack, waiting, stack)); stack->waiting_list_event = event; DOUBLE_LINKED_LIST_INSERT_TAIL(master_stack, waiting, stack); }
/* Enqueue access in stack wait list. */ void mod_stack_wait_in_stack(struct mod_stack_t *stack, struct mod_stack_t *master_stack, int event) { assert(master_stack != stack); assert(!DOUBLE_LINKED_LIST_MEMBER(master_stack, waiting, stack)); stack->waiting_list_event = event; DOUBLE_LINKED_LIST_INSERT_TAIL(master_stack, waiting, stack); if((stack->addr >= 0x2F20 && stack->addr<= 0x2F2C)) ;// fprintf(stderr, " wait master %x, %x, %d,%lld\n", master_stack->addr, stack->addr , event,esim_time); }
struct frm_grid_t *frm_grid_create(struct frm_cuda_function_t *function) { struct frm_grid_t *grid; /* Initialize */ grid = xcalloc(1, sizeof(struct frm_grid_t)); DOUBLE_LINKED_LIST_INSERT_TAIL(frm_emu, grid, grid); grid->id = 0; strncpy(grid->name, function->name, MAX_STRING_SIZE); grid->function = function; /* Return */ return grid; }
void x86_emu_list_insert_tail(enum x86_emu_list_kind_t list, struct x86_ctx_t *ctx) { assert(!x86_emu_list_member(list, ctx)); switch (list) { case x86_emu_list_context: DOUBLE_LINKED_LIST_INSERT_TAIL(x86_emu, context, ctx); break; case x86_emu_list_running: DOUBLE_LINKED_LIST_INSERT_TAIL(x86_emu, running, ctx); break; case x86_emu_list_finished: DOUBLE_LINKED_LIST_INSERT_TAIL(x86_emu, finished, ctx); break; case x86_emu_list_zombie: DOUBLE_LINKED_LIST_INSERT_TAIL(x86_emu, zombie, ctx); break; case x86_emu_list_suspended: DOUBLE_LINKED_LIST_INSERT_TAIL(x86_emu, suspended, ctx); break; case x86_emu_list_alloc: DOUBLE_LINKED_LIST_INSERT_TAIL(x86_emu, alloc, ctx); break; } }
struct frm_grid_t *frm_grid_create(struct frm_cuda_function_t *function) { struct frm_grid_t *grid; /* Allocate */ grid = calloc(1, sizeof(struct frm_grid_t)); if (!grid) fatal("%s: out of memory", __FUNCTION__); /* Initialize */ DOUBLE_LINKED_LIST_INSERT_TAIL(frm_emu, grid, grid); grid->id = 0; strncpy(grid->name, function->name, MAX_STRING_SIZE); grid->function = function; /* Return */ return grid; }
void MIPSEmuListInsertTail(MIPSEmu *self, enum mips_emu_list_kind_t list, struct mips_ctx_t *ctx) { assert(!MIPSEmuListMember(self, list, ctx)); switch (list) { case mips_emu_list_context: DOUBLE_LINKED_LIST_INSERT_TAIL(self, context, ctx); break; case mips_emu_list_running: DOUBLE_LINKED_LIST_INSERT_TAIL(self, running, ctx); break; case mips_emu_list_finished: DOUBLE_LINKED_LIST_INSERT_TAIL(self, finished, ctx); break; case mips_emu_list_zombie: DOUBLE_LINKED_LIST_INSERT_TAIL(self, zombie, ctx); break; case mips_emu_list_suspended: DOUBLE_LINKED_LIST_INSERT_TAIL(self, suspended, ctx); break; case mips_emu_list_alloc: DOUBLE_LINKED_LIST_INSERT_TAIL(self, alloc, ctx); break; } }
struct si_ndrange_t *si_ndrange_create(char *name) { struct si_ndrange_t *ndrange; /* Initialize */ ndrange = xcalloc(1, sizeof(struct si_ndrange_t)); ndrange->id = si_emu->ndrange_count++; /* Insert in ND-Range list of SouthernIslands emulator */ DOUBLE_LINKED_LIST_INSERT_TAIL(si_emu, ndrange, ndrange); /* Instruction histogram */ if (si_emu_report_file) ndrange->inst_histogram = xcalloc(SI_INST_COUNT, sizeof(unsigned int)); /* Return */ return ndrange; }
struct si_ndrange_t *si_ndrange_create(struct si_opencl_kernel_t *kernel) { struct si_ndrange_t *ndrange; /* Allocate */ ndrange = calloc(1, sizeof(struct si_ndrange_t)); if (!ndrange) fatal("%s: out of memory", __FUNCTION__); /* Insert in ND-Range list of SouthernIslands emulator */ DOUBLE_LINKED_LIST_INSERT_TAIL(si_emu, ndrange, ndrange); /* Name */ ndrange->name = strdup(kernel->name); if (!ndrange->name) fatal("%s: out of memory", __FUNCTION__); /* Initialize */ ndrange->kernel = kernel; ndrange->local_mem_top = kernel->func_mem_local; ndrange->id = si_emu->ndrange_count++; /* Create the UAV-to-physical-address lookup lists */ ndrange->uav_list = list_create(); /* Instruction histogram */ if (evg_emu_report_file) { ndrange->inst_histogram = calloc(EVG_INST_COUNT, sizeof(unsigned int)); if (!ndrange->inst_histogram) fatal("%s: out of memory", __FUNCTION__); } /* Return */ return ndrange; }
void frm_grid_setup_threads(struct frm_grid_t *grid) { struct frm_cuda_function_t *function = grid->function; struct frm_threadblock_t *threadblock; struct frm_warp_t *warp; struct frm_thread_t *thread; int bidx, bidy, bidz; /* 3D threadblock ID iterators */ int lidx, lidy, lidz; /* 3D thread local ID iterators */ int tid; /* Global ID iterator */ int bid; /* Threadblock ID iterator */ int wid; /* Warp ID iterator */ int lid; /* Local ID iterator */ /* Array of threadblocks */ grid->threadblock_count = function->group_count; grid->threadblock_id_first = 0; grid->threadblock_id_last = grid->threadblock_count - 1; grid->threadblocks = calloc(grid->threadblock_count, sizeof(void *)); for (bid = 0; bid < grid->threadblock_count; bid++) grid->threadblocks[bid] = frm_threadblock_create(); /* Array of warps */ grid->warps_per_threadblock = (function->local_size + frm_emu_warp_size - 1) / frm_emu_warp_size; grid->warp_count = grid->warps_per_threadblock * grid->threadblock_count; grid->warp_id_first = 0; grid->warp_id_last = grid->warp_count - 1; assert(grid->warps_per_threadblock > 0 && grid->warp_count > 0); grid->warps = calloc(grid->warp_count, sizeof(void *)); for (wid = 0; wid < grid->warp_count; wid++) { bid = wid / grid->warps_per_threadblock; grid->warps[wid] = frm_warp_create(); warp = grid->warps[wid]; threadblock = grid->threadblocks[bid]; warp->id = wid; warp->id_in_threadblock = wid % grid->warps_per_threadblock; warp->grid = grid; warp->threadblock = threadblock; DOUBLE_LINKED_LIST_INSERT_TAIL(threadblock, running, warp); } /* Array of threads */ grid->thread_count = function->global_size; grid->thread_id_first = 0; grid->thread_id_last = grid->thread_count - 1; grid->threads = calloc(grid->thread_count, sizeof(void *)); tid = 0; bid = 0; for (bidz = 0; bidz < function->group_count3[2]; bidz++) { for (bidy = 0; bidy < function->group_count3[1]; bidy++) { for (bidx = 0; bidx < function->group_count3[0]; bidx++) { /* Assign threadblock ID */ threadblock = grid->threadblocks[bid]; threadblock->grid = grid; threadblock->id_3d[0] = bidx; threadblock->id_3d[1] = bidy; threadblock->id_3d[2] = bidz; threadblock->id = bid; frm_threadblock_set_status(threadblock, frm_threadblock_pending); /* First, last, and number of threads in threadblock */ threadblock->thread_id_first = tid; threadblock->thread_id_last = tid + function->local_size - 1; threadblock->thread_count = function->local_size; threadblock->threads = &grid->threads[tid]; snprintf(threadblock->name, sizeof(threadblock->name), "threadblock[i%d-i%d]", threadblock->thread_id_first, threadblock->thread_id_last); /* First ,last, and number of warps in threadblock */ threadblock->warp_id_first = bid * grid->warps_per_threadblock; threadblock->warp_id_last = threadblock->warp_id_first + grid->warps_per_threadblock - 1; threadblock->warp_count = grid->warps_per_threadblock; threadblock->warps = &grid->warps[threadblock->warp_id_first]; /* Iterate through threads */ lid = 0; for (lidz = 0; lidz < function->local_size3[2]; lidz++) { for (lidy = 0; lidy < function->local_size3[1]; lidy++) { for (lidx = 0; lidx < function->local_size3[0]; lidx++) { /* Warp ID */ wid = bid * grid->warps_per_threadblock + lid / frm_emu_warp_size; assert(wid < grid->warp_count); warp = grid->warps[wid]; /* Create thread */ grid->threads[tid] = frm_thread_create(); thread = grid->threads[tid]; thread->grid = grid; /* Global IDs */ thread->id_3d[0] = bidx * function->local_size3[0] + lidx; thread->id_3d[1] = bidy * function->local_size3[1] + lidy; thread->id_3d[2] = bidz * function->local_size3[2] + lidz; thread->id = tid; /* Local IDs */ thread->id_in_threadblock_3d[0] = lidx; thread->id_in_threadblock_3d[1] = lidy; thread->id_in_threadblock_3d[2] = lidz; thread->id_in_threadblock = lid; /* Other */ thread->id_in_warp = thread->id_in_threadblock % frm_emu_warp_size; thread->threadblock = grid->threadblocks[bid]; thread->warp = grid->warps[wid]; /* First, last, and number of threads in warp */ if (!warp->thread_count) { warp->thread_id_first = tid; warp->threads = &grid->threads[tid]; } warp->thread_count++; warp->thread_id_last = tid; bit_map_set(warp->active_stack, thread->id_in_warp, 1, 1); /* Save local IDs in register R0 */ thread->sr[FRM_SR_Tid_X].v.i = lidx; /* R0.x */ thread->sr[FRM_SR_Tid_Y].v.i = lidy; /* R0.y */ thread->sr[FRM_SR_Tid_Z].v.i = lidz; /* R0.z */ /* Save threadblock IDs in register R1 */ thread->sr[FRM_SR_CTAid_X].v.i = bidx; /* R1.x */ thread->sr[FRM_SR_CTAid_Y].v.i = bidy; /* R1.y */ thread->sr[FRM_SR_CTAid_Z].v.i = bidz; /* R1.z */ /* Next thread */ tid++; lid++; } } } /* Next threadblock */ bid++; } } } /* Assign names to warps */ for (wid = 0; wid < grid->warp_count; wid++) { warp = grid->warps[wid]; snprintf(warp->name, sizeof(warp->name), "warp[i%d-i%d]", warp->thread_id_first, warp->thread_id_last); /* Initialize warp program counter */ warp->buf_start = function->function_buffer.ptr; warp->buf = warp->buf_start; warp->buf_size = function->function_buffer.size; } /* Debug */ printf("local_size = %d (%d,%d,%d)\n", function->local_size, function->local_size3[0], function->local_size3[1], function->local_size3[2]); printf("global_size = %d (%d,%d,%d)\n", function->global_size, function->global_size3[0], function->global_size3[1], function->global_size3[2]); printf("group_count = %d (%d,%d,%d)\n", function->group_count, function->group_count3[0], function->group_count3[1], function->group_count3[2]); printf("warp_count = %d\n", grid->warp_count); printf("warps_per_threadblock = %d\n", grid->warps_per_threadblock); printf(" tid tid2 tid1 tid0 bid bid2 bid1 bid0 lid lid2 lid1 lid0 warp work-group\n"); for (tid = 0; tid < grid->thread_count; tid++) { thread = grid->threads[tid]; warp = thread->warp; threadblock = thread->threadblock; printf("%4d %4d %4d %4d ", thread->id, thread->id_3d[2], thread->id_3d[1], thread->id_3d[0]); printf("%4d %4d %4d %4d ", threadblock->id, threadblock->id_3d[2], threadblock->id_3d[1], threadblock->id_3d[0]); printf("%4d %4d %4d %4d ", thread->id_in_threadblock, thread->id_in_threadblock_3d[2], thread->id_in_threadblock_3d[1], thread->id_in_threadblock_3d[0]); printf("%20s.%-4d ", warp->name, thread->id_in_warp); printf("%20s.%-4d\n", threadblock->name, thread->id_in_threadblock); } }
static void si_ndrange_setup_arrays(struct si_ndrange_t *ndrange) { struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /* Array of work-groups */ ndrange->work_group_count = ndrange->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = xcalloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < ndrange->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (ndrange->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = xcalloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = xcalloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } /* Array of work-items */ ndrange->work_item_count = ndrange->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = xcalloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < ndrange->group_count3[2]; gidz++) { for (gidy = 0; gidy < ndrange->group_count3[1]; gidy++) { for (gidx = 0; gidx < ndrange->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + ndrange->local_size; work_group->work_item_count = ndrange->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < ndrange->local_size3[2]; lidz++) { for (lidy = 0; lidy < ndrange->local_size3[1]; lidy++) { for (lidx = 0; lidx < ndrange->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * ndrange->local_size3[0] + lidx; work_item->id_3d[1] = gidy * ndrange->local_size3[1] + lidy; work_item->id_3d[2] = gidz * ndrange->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /*MIAOW start*/ work_item->id = work_item->id_in_wavefront; /*MIAOW stop*/ /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); } /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", ndrange->local_size, ndrange->local_size3[0], ndrange->local_size3[1], ndrange->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", ndrange->global_size, ndrange->global_size3[0], ndrange->global_size3[1], ndrange->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", ndrange->group_count, ndrange->group_count3[0], ndrange->group_count3[1], ndrange->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug("\n"); }
void si_ndrange_setup_work_items(struct si_ndrange_t *ndrange) { struct si_opencl_kernel_t *kernel = ndrange->kernel; struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /* Array of work-groups */ ndrange->work_group_count = kernel->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = calloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < kernel->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (kernel->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = calloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = calloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } /* Array of work-items */ ndrange->work_item_count = kernel->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = calloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < kernel->group_count3[2]; gidz++) { for (gidy = 0; gidy < kernel->group_count3[1]; gidy++) { for (gidx = 0; gidx < kernel->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + kernel->local_size; work_group->work_item_count = kernel->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < kernel->local_size3[2]; lidz++) { for (lidy = 0; lidy < kernel->local_size3[1]; lidy++) { for (lidx = 0; lidx < kernel->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * kernel->local_size3[0] + lidx; work_item->id_3d[1] = gidy * kernel->local_size3[1] + lidy; work_item->id_3d[2] = gidz * kernel->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; /* Save local IDs in registers */ work_item->vreg[0].as_int = lidx; /* V0 */ work_item->vreg[1].as_int = lidy; /* V1 */ work_item->vreg[2].as_int = lidz; /* V2 */ /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); /* Initialize wavefront program counter */ if (!kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size) fatal("%s: cannot load kernel code", __FUNCTION__); wavefront->inst_buf_start = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; wavefront->inst_buf = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; /* Save work-group IDs in registers */ unsigned int user_sgpr = kernel->bin_file->enc_dict_entry_southern_islands->compute_pgm_rsrc2->user_sgpr; wavefront->sreg[user_sgpr].as_int = wavefront->work_group->id_3d[0]; wavefront->sreg[user_sgpr + 1].as_int = wavefront->work_group->id_3d[1]; wavefront->sreg[user_sgpr + 2].as_int = wavefront->work_group->id_3d[2]; /* Initialize Constant Buffers */ unsigned int userElementCount = kernel->bin_file->enc_dict_entry_southern_islands->userElementCount; struct si_bin_enc_user_element_t* userElements = kernel->bin_file->enc_dict_entry_southern_islands->userElements; for (int i = 0; i < userElementCount; i++) { if (userElements[i].dataClass == IMM_CONST_BUFFER) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == IMM_UAV) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == PTR_CONST_BUFFER_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else if (userElements[i].dataClass == PTR_UAV_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else { fatal("Unimplemented User Element: dataClass:%d", userElements[i].dataClass); } } /* Initialize the execution mask */ wavefront->sreg[SI_EXEC].as_int = 0xFFFFFFFF; wavefront->sreg[SI_EXEC + 1].as_int = 0xFFFFFFFF; wavefront->sreg[SI_EXECZ].as_int = 0; } /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", kernel->local_size, kernel->local_size3[0], kernel->local_size3[1], kernel->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", kernel->global_size, kernel->global_size3[0], kernel->global_size3[1], kernel->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", kernel->group_count, kernel->group_count3[0], kernel->group_count3[1], kernel->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug(" tid tid2 tid1 tid0 gid gid2 gid1 gid0 lid lid2 lid1 lid0 wavefront work-group\n"); for (tid = 0; tid < ndrange->work_item_count; tid++) { work_item = ndrange->work_items[tid]; wavefront = work_item->wavefront; work_group = work_item->work_group; si_isa_debug("%4d %4d %4d %4d ", work_item->id, work_item->id_3d[2], work_item->id_3d[1], work_item->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_group->id, work_group->id_3d[2], work_group->id_3d[1], work_group->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_item->id_in_work_group, work_item->id_in_work_group_3d[2], work_item->id_in_work_group_3d[1], work_item->id_in_work_group_3d[0]); si_isa_debug("%20s.%-4d ", wavefront->name, work_item->id_in_wavefront); si_isa_debug("%20s.%-4d\n", work_group->name, work_item->id_in_work_group); } }
void si_ndrange_setup_work_items(struct si_ndrange_t *ndrange) { struct si_opencl_kernel_t *kernel = ndrange->kernel; struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /*MIAOW start */ char config_str[100]; sprintf(config_str, "config_%d.txt", kernel_config_count); FILE* config = fopen(config_str, "w"); /*MIAOW stop */ /*MIAOW start*/ //UNIT TEST char unit_test_input_buf[150000]; char *tok = NULL; char *config_read_result = NULL; char vreg_str[64][2500]; char sreg_str[2500]; FILE* unit_test_config = fopen("unit_test_config.txt", "r"); if (unit_test_config != 0) { int i; int num_of_threads = 0; //ndrange->wavefront_count = 1; //kernel->group_count = 1; kernel->local_size3[2] = 1; kernel->local_size3[1] = 1; kernel->global_size3[2] = 1; kernel->global_size3[1] = 1; config_read_result = fgets(unit_test_input_buf, 150000, unit_test_config); if(config_read_result != NULL) { tok = strtok(unit_test_input_buf, ";"); //WG count kernel->group_count = atoi(tok); tok = strtok(NULL, ";"); //total number of threads num_of_threads = atoi(tok); kernel->global_size = atoi(tok); kernel->global_size3[0] = atoi(tok); kernel->local_size3[0] = atoi(tok); kernel->local_size = atoi(tok); } } //WorkGroup count and thread count fprintf(config,"%d;%d;\n", kernel->group_count, kernel->global_size); #ifdef MIAOW_DEBUG fflush(config); #endif /*MIAOW stop*/ /* Array of work-groups */ ndrange->work_group_count = kernel->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = xcalloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < kernel->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (kernel->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = xcalloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = xcalloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } #ifdef MIAOW_DEBUG fprintf(config, "Processing Workitems\n"); fflush(config); #endif /* Array of work-items */ ndrange->work_item_count = kernel->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = xcalloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < kernel->group_count3[2]; gidz++) { for (gidy = 0; gidy < kernel->group_count3[1]; gidy++) { for (gidx = 0; gidx < kernel->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + kernel->local_size; work_group->work_item_count = kernel->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < kernel->local_size3[2]; lidz++) { for (lidy = 0; lidy < kernel->local_size3[1]; lidy++) { for (lidx = 0; lidx < kernel->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * kernel->local_size3[0] + lidx; work_item->id_3d[1] = gidy * kernel->local_size3[1] + lidy; work_item->id_3d[2] = gidz * kernel->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /*MIAOW start*/ work_item->id = work_item->id_in_wavefront; /*MIAOW stop*/ /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; //Initializing all vreg values to zero, so that config.txt doesnt change with each run /*MIAOW start*/ for (int vreg_init_index; vreg_init_index < 256; vreg_init_index++) { work_item->vreg[vreg_init_index].as_int = 0; } /*MIAOW stop*/ /* Save local IDs in registers */ work_item->vreg[0].as_int = lidx; /* V0 */ work_item->vreg[1].as_int = lidy; /* V1 */ work_item->vreg[2].as_int = lidz; /* V2 */ /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /*MIAOW start */ //This part is for unit test trace generation. //If the file unit_test_instr.mem is present, the contents will be read and placed in the instruction buffer. FILE* unit_test_instr = fopen("unit_test_instr.mem", "r"); if (unit_test_instr != 0) { unsigned char instr_buf[200]; int input_instr_count = 0; fgets(instr_buf, 200, unit_test_instr); //address unsigned char* buf_ptr = (unsigned char*)kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; while (fgets(instr_buf, 200, unit_test_instr) != NULL) { instr_buf[2] = '\0'; //interested only in first byte. unsigned char cur_instr = (unsigned char)strtol(instr_buf, 0, 16); buf_ptr[input_instr_count++] = cur_instr; } kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size = input_instr_count; fclose(unit_test_instr); } /*MIAOW stop */ /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); /* Initialize wavefront program counter */ if (!kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size) fatal("%s: cannot load kernel code", __FUNCTION__); wavefront->wavefront_pool_start = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; wavefront->wavefront_pool = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; //Initializing all sreg values to zero, so that config.txt doesnt change with each run /*MIAOW start*/ for (int sreg_init_index; sreg_init_index < 256; sreg_init_index++) { //wavefront->sreg[sreg_init_index].as_int = 0; } /*MIAOW stop*/ /* Save work-group IDs in registers */ unsigned int user_sgpr = kernel->bin_file-> enc_dict_entry_southern_islands->compute_pgm_rsrc2->user_sgpr; wavefront->sreg[user_sgpr].as_int = wavefront->work_group->id_3d[0]; wavefront->sreg[user_sgpr + 1].as_int = wavefront->work_group->id_3d[1]; wavefront->sreg[user_sgpr + 2].as_int = wavefront->work_group->id_3d[2]; /* Initialize Constant Buffers */ unsigned int userElementCount = kernel->bin_file->enc_dict_entry_southern_islands->userElementCount; struct si_bin_enc_user_element_t* userElements = kernel->bin_file->enc_dict_entry_southern_islands->userElements; for (int i = 0; i < userElementCount; i++) { if (userElements[i].dataClass == IMM_CONST_BUFFER) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == IMM_UAV) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == PTR_CONST_BUFFER_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else if (userElements[i].dataClass == PTR_UAV_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else { fatal("Unimplemented User Element: dataClass:%d", userElements[i].dataClass); } } //MIAOW m2s is not setting exec mask properly /* Initialize the execution mask */ //wavefront->sreg[SI_EXEC].as_int = 0xFFFFFFFF; //wavefront->sreg[SI_EXEC + 1].as_int = 0xFFFFFFFF; //wavefront->sreg[SI_EXECZ].as_int = 0; /*MIAOW start*/ //EXEC Mask init unsigned long long mask; if(wavefront->work_item_count == 64) { mask = 0xFFFFFFFFFFFFFFFF; } else { mask = powl(2, wavefront->work_item_count) - 1; } wavefront->sreg[SI_EXEC].as_uint = (unsigned int)mask; wavefront->sreg[SI_EXEC + 1].as_uint = mask>>32; wavefront->sreg[SI_EXECZ].as_int = 0; /*MIAOW stop*/ /*MIAOW start*/ if(config_read_result != NULL) { if(NULL != fgets(unit_test_input_buf, 150000, unit_test_config)) { int num_of_threads = 0; int thread_init_count = 0; tok = strtok(unit_test_input_buf, ";"); //WGID tok = strtok(NULL, ";"); //WFID tok = strtok(NULL, ";"); //WF count tok = strtok(NULL, ";"); //thread count num_of_threads = atoi(tok); #ifdef MIAOW_DEBUG if (num_of_threads != wavefront->work_item_count) { fprintf(config, "num_thread MISMATCH %d!=%d\n", num_of_threads, wavefront->work_item_count); } else { fprintf(config, "num_thread match %d=%d\n", num_of_threads, wavefront->work_item_count); } fflush(config); #endif tok = strtok(NULL, ";"); //VREG size kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used = atoi(tok); tok = strtok(NULL, ";"); //SREG size kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used = atoi(tok); tok = strtok(NULL, ";"); //LDS size kernel->bin_file->enc_dict_entry_southern_islands->lds_size_used = atoi(tok); for(thread_init_count = 0; thread_init_count < num_of_threads; thread_init_count++) { tok = strtok(NULL, ";"); strcpy((char*)vreg_str[thread_init_count], tok); assert(vreg_str[thread_init_count][0] == 'V'); } tok = strtok(NULL, ";"); strcpy((char*)sreg_str, tok); assert(sreg_str[0] == 'S'); tok = strtok(NULL, ";"); //PC } #ifdef MIAOW_DEBUG fprintf(config, "Initializing VREG \n"); fflush(config); #endif //VREG value init int wi_init_count = 0; for (wi_init_count = 0; wi_init_count < wavefront->work_item_count; wi_init_count++) { if (wavefront->work_items != NULL) { int vreg_init_count = 0; char *reg_tok; struct si_work_item_t* wi = wavefront->work_items[wi_init_count]; reg_tok = strtok(vreg_str[wi_init_count], ":"); reg_tok = strtok(NULL, "="); for(vreg_init_count = 0; reg_tok != NULL; vreg_init_count++) { int vreg_index = atoi(reg_tok); reg_tok = strtok(NULL, ","); assert(reg_tok != NULL); wi->vreg[vreg_index].as_int = atoi(reg_tok); reg_tok = strtok(NULL, "="); } // make sure that all reg values were read assert(reg_tok == NULL); } } #ifdef MIAOW_DEBUG fprintf(config, "Initializing SREG \n"); fflush(config); #endif #ifdef MIAOW_DEBUG fprintf(config, "mask: %lld \n", mask); fprintf(config, "MASK HI: %u \n", wavefront->sreg[SI_EXEC + 1].as_uint); fprintf(config, "MASK LO: %u \n", wavefront->sreg[SI_EXEC].as_uint); fflush(config); #endif //SREG value init int sreg_init_count = 0; char *sreg_tok; sreg_tok = strtok(sreg_str, ":"); sreg_tok = strtok(NULL, "="); for(sreg_init_count=0; sreg_tok != NULL; sreg_init_count++) { int sreg_index = atoi(sreg_tok); sreg_tok = strtok(NULL, ","); assert(sreg_tok != NULL); wavefront->sreg[sreg_index].as_int = atoi(sreg_tok); sreg_tok = strtok(NULL, "="); } // make sure that all reg values were read assert(sreg_tok == NULL); } /*MIAOW stop*/ /*MIAOW start*/ //WorkGroup ID fprintf(config,"%d;",wavefront->work_group->id); //Wavefront ID fprintf(config,"%d;",wavefront->id_in_work_group); //Wavefront Count fprintf(config,"%d;",wavefront->work_group->wavefront_count); //Thread count fprintf(config,"%d;",wavefront->work_item_count); //VGPR size, SGPR size, LDS size fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used); fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used); fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->lds_size_used); #ifdef MIAOW_DEBUG fflush(config); #endif int wi_count = 0; for (wi_count = 0; wi_count < wavefront->work_item_count; wi_count++) { //VGPR initial values if (wavefront->work_items != NULL) { struct si_work_item_t* wi = wavefront->work_items[wi_count]; fprintf(config,"V:"); int vgpr_count = 0; for (vgpr_count = 0; vgpr_count < (kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used - 1); vgpr_count++) { //All VGPR values except the last fprintf(config,"%d=%d,", vgpr_count, wi->vreg[vgpr_count]); } //Last SGPR value fprintf(config,"%d=%d;", vgpr_count, wi->vreg[vgpr_count]); } } //SGPR initial values fprintf(config,"S:"); int sgpr_count = 0; for (sgpr_count = 0; sgpr_count < (kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used - 1); sgpr_count++) { //All SGPR values except the last fprintf(config,"%d=%d,", sgpr_count, wavefront->sreg[sgpr_count]); } //Last SGPR value fprintf(config,"%d=%d;", sgpr_count, wavefront->sreg[sgpr_count]); //PC start //fprintf(config,"%d",wavefront->wavefront_pool_start); fprintf(config, "0"); fprintf(config,"\n"); /*MIAOW stop*/ } /*MIAOW start */ fclose(config); char instr_str[100]; sprintf(instr_str, "instr_%d.mem", kernel_config_count); FILE* instr = fopen(instr_str, "w"); //fprintf(instr, "@%.8x\n", kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr); fprintf(instr, "@0\n"); for (int instr_count = 0; instr_count < kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size; instr_count++) { fprintf(instr, "%.2x\n", ((unsigned char*)kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr)[instr_count]); } fclose(instr); /*MIAOW stop */ /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", kernel->local_size, kernel->local_size3[0], kernel->local_size3[1], kernel->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", kernel->global_size, kernel->global_size3[0], kernel->global_size3[1], kernel->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", kernel->group_count, kernel->group_count3[0], kernel->group_count3[1], kernel->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug(" tid tid2 tid1 tid0 gid gid2 gid1 gid0 lid lid2 lid1 lid0 wavefront work-group\n"); for (tid = 0; tid < ndrange->work_item_count; tid++) { work_item = ndrange->work_items[tid]; wavefront = work_item->wavefront; work_group = work_item->work_group; si_isa_debug("%4d %4d %4d %4d ", work_item->id, work_item->id_3d[2], work_item->id_3d[1], work_item->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_group->id, work_group->id_3d[2], work_group->id_3d[1], work_group->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_item->id_in_work_group, work_item->id_in_work_group_3d[2], work_item->id_in_work_group_3d[1], work_item->id_in_work_group_3d[0]); si_isa_debug("%20s.%-4d ", wavefront->name, work_item->id_in_wavefront); si_isa_debug("%20s.%-4d\n", work_group->name, work_item->id_in_work_group); } }