static void evg_tex_engine_fetch(struct evg_compute_unit_t *compute_unit) { struct linked_list_t *pending_queue = compute_unit->tex_engine.pending_queue; struct linked_list_t *finished_queue = compute_unit->tex_engine.finished_queue; struct evg_wavefront_t *wavefront; struct evg_uop_t *cf_uop, *uop; struct evg_work_item_uop_t *work_item_uop; struct evg_inst_t *inst; int inst_num; struct evg_work_item_t *work_item; int work_item_id; char str[MAX_LONG_STRING_SIZE]; char str_trimmed[MAX_LONG_STRING_SIZE]; /* Get wavefront to fetch from */ linked_list_head(pending_queue); cf_uop = linked_list_get(pending_queue); if (!cf_uop) return; wavefront = cf_uop->wavefront; assert(wavefront->clause_kind == EVG_CLAUSE_TEX); /* If fetch queue is full, cannot fetch until space is made */ if (compute_unit->tex_engine.fetch_queue_length >= evg_gpu_tex_engine_fetch_queue_size) return; /* Emulate instruction and create uop */ inst_num = (wavefront->clause_buf - wavefront->clause_buf_start) / 16; evg_wavefront_execute(wavefront); inst = &wavefront->tex_inst; uop = evg_uop_create(); uop->wavefront = wavefront; uop->work_group = wavefront->work_group; uop->cf_uop = cf_uop; uop->compute_unit = compute_unit; uop->id_in_compute_unit = compute_unit->gpu_uop_id_counter++; uop->last = wavefront->clause_kind != EVG_CLAUSE_TEX; uop->global_mem_read = wavefront->global_mem_read; uop->global_mem_write = wavefront->global_mem_write; uop->vliw_slots = 1; /* If TEX clause finished, extract CF uop from 'pending_queue' and * insert it into 'finished_queue'. */ if (uop->last) { linked_list_remove(pending_queue); linked_list_add(finished_queue, cf_uop); } /* If instruction is a global memory read (should be), record addresses */ if (uop->global_mem_read) { assert((inst->info->flags & EVG_INST_FLAG_MEM_READ)); EVG_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = evg_gpu->ndrange->work_items[work_item_id]; work_item_uop = &uop->work_item_uop[work_item->id_in_wavefront]; work_item_uop->global_mem_access_addr = work_item->global_mem_access_addr; work_item_uop->global_mem_access_size = work_item->global_mem_access_size; }
static void evg_cf_engine_fetch(struct evg_compute_unit_t *compute_unit) { struct evg_ndrange_t *ndrange = evg_gpu->ndrange; struct evg_wavefront_t *wavefront; char str[MAX_LONG_STRING_SIZE]; char str_trimmed[MAX_LONG_STRING_SIZE]; struct evg_inst_t *inst; struct evg_uop_t *uop; struct evg_work_item_uop_t *work_item_uop; struct evg_work_item_t *work_item; int work_item_id; /* Schedule wavefront */ wavefront = evg_compute_unit_schedule(compute_unit); if (!wavefront) return; /* Emulate CF instruction */ evg_wavefront_execute(wavefront); inst = &wavefront->cf_inst; /* Create uop */ uop = evg_uop_create(); uop->wavefront = wavefront; uop->work_group = wavefront->work_group; uop->compute_unit = compute_unit; uop->id_in_compute_unit = compute_unit->gpu_uop_id_counter++; uop->alu_clause_trigger = wavefront->clause_kind == EVG_CLAUSE_ALU; uop->tex_clause_trigger = wavefront->clause_kind == EVG_CLAUSE_TEX; uop->no_clause_trigger = wavefront->clause_kind == EVG_CLAUSE_CF; uop->last = DOUBLE_LINKED_LIST_MEMBER(wavefront->work_group, finished, wavefront); uop->wavefront_last = uop->last && uop->no_clause_trigger; uop->global_mem_read = wavefront->global_mem_read; uop->global_mem_write = wavefront->global_mem_write; uop->active_mask_update = wavefront->active_mask_update; uop->active_mask_push = wavefront->active_mask_push; uop->active_mask_pop = wavefront->active_mask_pop; uop->active_mask_stack_top = wavefront->stack_top; uop->vliw_slots = 1; /* If debugging active mask, store active state for work-items */ if (debug_status(evg_stack_debug_category)) evg_uop_save_active_mask(uop); /* If instruction is a global memory write, record addresses */ if (uop->global_mem_write) { assert((inst->info->flags & EVG_INST_FLAG_MEM_WRITE)); EVG_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = ndrange->work_items[work_item_id]; work_item_uop = &uop->work_item_uop[work_item->id_in_wavefront]; work_item_uop->global_mem_access_addr = work_item->global_mem_access_addr; work_item_uop->global_mem_access_size = work_item->global_mem_access_size; }