void si_ndrange_setup_work_items(struct si_ndrange_t *ndrange) { struct si_opencl_kernel_t *kernel = ndrange->kernel; struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /* Array of work-groups */ ndrange->work_group_count = kernel->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = calloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < kernel->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (kernel->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = calloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = calloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } /* Array of work-items */ ndrange->work_item_count = kernel->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = calloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < kernel->group_count3[2]; gidz++) { for (gidy = 0; gidy < kernel->group_count3[1]; gidy++) { for (gidx = 0; gidx < kernel->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + kernel->local_size; work_group->work_item_count = kernel->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < kernel->local_size3[2]; lidz++) { for (lidy = 0; lidy < kernel->local_size3[1]; lidy++) { for (lidx = 0; lidx < kernel->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * kernel->local_size3[0] + lidx; work_item->id_3d[1] = gidy * kernel->local_size3[1] + lidy; work_item->id_3d[2] = gidz * kernel->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; /* Save local IDs in registers */ work_item->vreg[0].as_int = lidx; /* V0 */ work_item->vreg[1].as_int = lidy; /* V1 */ work_item->vreg[2].as_int = lidz; /* V2 */ /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); /* Initialize wavefront program counter */ if (!kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size) fatal("%s: cannot load kernel code", __FUNCTION__); wavefront->inst_buf_start = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; wavefront->inst_buf = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; /* Save work-group IDs in registers */ unsigned int user_sgpr = kernel->bin_file->enc_dict_entry_southern_islands->compute_pgm_rsrc2->user_sgpr; wavefront->sreg[user_sgpr].as_int = wavefront->work_group->id_3d[0]; wavefront->sreg[user_sgpr + 1].as_int = wavefront->work_group->id_3d[1]; wavefront->sreg[user_sgpr + 2].as_int = wavefront->work_group->id_3d[2]; /* Initialize Constant Buffers */ unsigned int userElementCount = kernel->bin_file->enc_dict_entry_southern_islands->userElementCount; struct si_bin_enc_user_element_t* userElements = kernel->bin_file->enc_dict_entry_southern_islands->userElements; for (int i = 0; i < userElementCount; i++) { if (userElements[i].dataClass == IMM_CONST_BUFFER) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == IMM_UAV) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == PTR_CONST_BUFFER_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else if (userElements[i].dataClass == PTR_UAV_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else { fatal("Unimplemented User Element: dataClass:%d", userElements[i].dataClass); } } /* Initialize the execution mask */ wavefront->sreg[SI_EXEC].as_int = 0xFFFFFFFF; wavefront->sreg[SI_EXEC + 1].as_int = 0xFFFFFFFF; wavefront->sreg[SI_EXECZ].as_int = 0; } /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", kernel->local_size, kernel->local_size3[0], kernel->local_size3[1], kernel->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", kernel->global_size, kernel->global_size3[0], kernel->global_size3[1], kernel->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", kernel->group_count, kernel->group_count3[0], kernel->group_count3[1], kernel->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug(" tid tid2 tid1 tid0 gid gid2 gid1 gid0 lid lid2 lid1 lid0 wavefront work-group\n"); for (tid = 0; tid < ndrange->work_item_count; tid++) { work_item = ndrange->work_items[tid]; wavefront = work_item->wavefront; work_group = work_item->work_group; si_isa_debug("%4d %4d %4d %4d ", work_item->id, work_item->id_3d[2], work_item->id_3d[1], work_item->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_group->id, work_group->id_3d[2], work_group->id_3d[1], work_group->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_item->id_in_work_group, work_item->id_in_work_group_3d[2], work_item->id_in_work_group_3d[1], work_item->id_in_work_group_3d[0]); si_isa_debug("%20s.%-4d ", wavefront->name, work_item->id_in_wavefront); si_isa_debug("%20s.%-4d\n", work_group->name, work_item->id_in_work_group); } }
static void si_ndrange_setup_arrays(struct si_ndrange_t *ndrange) { struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /* Array of work-groups */ ndrange->work_group_count = ndrange->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = xcalloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < ndrange->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (ndrange->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = xcalloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = xcalloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } /* Array of work-items */ ndrange->work_item_count = ndrange->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = xcalloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < ndrange->group_count3[2]; gidz++) { for (gidy = 0; gidy < ndrange->group_count3[1]; gidy++) { for (gidx = 0; gidx < ndrange->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + ndrange->local_size; work_group->work_item_count = ndrange->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < ndrange->local_size3[2]; lidz++) { for (lidy = 0; lidy < ndrange->local_size3[1]; lidy++) { for (lidx = 0; lidx < ndrange->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * ndrange->local_size3[0] + lidx; work_item->id_3d[1] = gidy * ndrange->local_size3[1] + lidy; work_item->id_3d[2] = gidz * ndrange->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /*MIAOW start*/ work_item->id = work_item->id_in_wavefront; /*MIAOW stop*/ /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); } /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", ndrange->local_size, ndrange->local_size3[0], ndrange->local_size3[1], ndrange->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", ndrange->global_size, ndrange->global_size3[0], ndrange->global_size3[1], ndrange->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", ndrange->group_count, ndrange->group_count3[0], ndrange->group_count3[1], ndrange->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug("\n"); }
void si_ndrange_setup_work_items(struct si_ndrange_t *ndrange) { struct si_opencl_kernel_t *kernel = ndrange->kernel; struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /*MIAOW start */ char config_str[100]; sprintf(config_str, "config_%d.txt", kernel_config_count); FILE* config = fopen(config_str, "w"); /*MIAOW stop */ /*MIAOW start*/ //UNIT TEST char unit_test_input_buf[150000]; char *tok = NULL; char *config_read_result = NULL; char vreg_str[64][2500]; char sreg_str[2500]; FILE* unit_test_config = fopen("unit_test_config.txt", "r"); if (unit_test_config != 0) { int i; int num_of_threads = 0; //ndrange->wavefront_count = 1; //kernel->group_count = 1; kernel->local_size3[2] = 1; kernel->local_size3[1] = 1; kernel->global_size3[2] = 1; kernel->global_size3[1] = 1; config_read_result = fgets(unit_test_input_buf, 150000, unit_test_config); if(config_read_result != NULL) { tok = strtok(unit_test_input_buf, ";"); //WG count kernel->group_count = atoi(tok); tok = strtok(NULL, ";"); //total number of threads num_of_threads = atoi(tok); kernel->global_size = atoi(tok); kernel->global_size3[0] = atoi(tok); kernel->local_size3[0] = atoi(tok); kernel->local_size = atoi(tok); } } //WorkGroup count and thread count fprintf(config,"%d;%d;\n", kernel->group_count, kernel->global_size); #ifdef MIAOW_DEBUG fflush(config); #endif /*MIAOW stop*/ /* Array of work-groups */ ndrange->work_group_count = kernel->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = xcalloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < kernel->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (kernel->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = xcalloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = xcalloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } #ifdef MIAOW_DEBUG fprintf(config, "Processing Workitems\n"); fflush(config); #endif /* Array of work-items */ ndrange->work_item_count = kernel->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = xcalloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < kernel->group_count3[2]; gidz++) { for (gidy = 0; gidy < kernel->group_count3[1]; gidy++) { for (gidx = 0; gidx < kernel->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + kernel->local_size; work_group->work_item_count = kernel->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < kernel->local_size3[2]; lidz++) { for (lidy = 0; lidy < kernel->local_size3[1]; lidy++) { for (lidx = 0; lidx < kernel->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * kernel->local_size3[0] + lidx; work_item->id_3d[1] = gidy * kernel->local_size3[1] + lidy; work_item->id_3d[2] = gidz * kernel->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /*MIAOW start*/ work_item->id = work_item->id_in_wavefront; /*MIAOW stop*/ /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; //Initializing all vreg values to zero, so that config.txt doesnt change with each run /*MIAOW start*/ for (int vreg_init_index; vreg_init_index < 256; vreg_init_index++) { work_item->vreg[vreg_init_index].as_int = 0; } /*MIAOW stop*/ /* Save local IDs in registers */ work_item->vreg[0].as_int = lidx; /* V0 */ work_item->vreg[1].as_int = lidy; /* V1 */ work_item->vreg[2].as_int = lidz; /* V2 */ /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /*MIAOW start */ //This part is for unit test trace generation. //If the file unit_test_instr.mem is present, the contents will be read and placed in the instruction buffer. FILE* unit_test_instr = fopen("unit_test_instr.mem", "r"); if (unit_test_instr != 0) { unsigned char instr_buf[200]; int input_instr_count = 0; fgets(instr_buf, 200, unit_test_instr); //address unsigned char* buf_ptr = (unsigned char*)kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; while (fgets(instr_buf, 200, unit_test_instr) != NULL) { instr_buf[2] = '\0'; //interested only in first byte. unsigned char cur_instr = (unsigned char)strtol(instr_buf, 0, 16); buf_ptr[input_instr_count++] = cur_instr; } kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size = input_instr_count; fclose(unit_test_instr); } /*MIAOW stop */ /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); /* Initialize wavefront program counter */ if (!kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size) fatal("%s: cannot load kernel code", __FUNCTION__); wavefront->wavefront_pool_start = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; wavefront->wavefront_pool = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; //Initializing all sreg values to zero, so that config.txt doesnt change with each run /*MIAOW start*/ for (int sreg_init_index; sreg_init_index < 256; sreg_init_index++) { //wavefront->sreg[sreg_init_index].as_int = 0; } /*MIAOW stop*/ /* Save work-group IDs in registers */ unsigned int user_sgpr = kernel->bin_file-> enc_dict_entry_southern_islands->compute_pgm_rsrc2->user_sgpr; wavefront->sreg[user_sgpr].as_int = wavefront->work_group->id_3d[0]; wavefront->sreg[user_sgpr + 1].as_int = wavefront->work_group->id_3d[1]; wavefront->sreg[user_sgpr + 2].as_int = wavefront->work_group->id_3d[2]; /* Initialize Constant Buffers */ unsigned int userElementCount = kernel->bin_file->enc_dict_entry_southern_islands->userElementCount; struct si_bin_enc_user_element_t* userElements = kernel->bin_file->enc_dict_entry_southern_islands->userElements; for (int i = 0; i < userElementCount; i++) { if (userElements[i].dataClass == IMM_CONST_BUFFER) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == IMM_UAV) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == PTR_CONST_BUFFER_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else if (userElements[i].dataClass == PTR_UAV_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else { fatal("Unimplemented User Element: dataClass:%d", userElements[i].dataClass); } } //MIAOW m2s is not setting exec mask properly /* Initialize the execution mask */ //wavefront->sreg[SI_EXEC].as_int = 0xFFFFFFFF; //wavefront->sreg[SI_EXEC + 1].as_int = 0xFFFFFFFF; //wavefront->sreg[SI_EXECZ].as_int = 0; /*MIAOW start*/ //EXEC Mask init unsigned long long mask; if(wavefront->work_item_count == 64) { mask = 0xFFFFFFFFFFFFFFFF; } else { mask = powl(2, wavefront->work_item_count) - 1; } wavefront->sreg[SI_EXEC].as_uint = (unsigned int)mask; wavefront->sreg[SI_EXEC + 1].as_uint = mask>>32; wavefront->sreg[SI_EXECZ].as_int = 0; /*MIAOW stop*/ /*MIAOW start*/ if(config_read_result != NULL) { if(NULL != fgets(unit_test_input_buf, 150000, unit_test_config)) { int num_of_threads = 0; int thread_init_count = 0; tok = strtok(unit_test_input_buf, ";"); //WGID tok = strtok(NULL, ";"); //WFID tok = strtok(NULL, ";"); //WF count tok = strtok(NULL, ";"); //thread count num_of_threads = atoi(tok); #ifdef MIAOW_DEBUG if (num_of_threads != wavefront->work_item_count) { fprintf(config, "num_thread MISMATCH %d!=%d\n", num_of_threads, wavefront->work_item_count); } else { fprintf(config, "num_thread match %d=%d\n", num_of_threads, wavefront->work_item_count); } fflush(config); #endif tok = strtok(NULL, ";"); //VREG size kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used = atoi(tok); tok = strtok(NULL, ";"); //SREG size kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used = atoi(tok); tok = strtok(NULL, ";"); //LDS size kernel->bin_file->enc_dict_entry_southern_islands->lds_size_used = atoi(tok); for(thread_init_count = 0; thread_init_count < num_of_threads; thread_init_count++) { tok = strtok(NULL, ";"); strcpy((char*)vreg_str[thread_init_count], tok); assert(vreg_str[thread_init_count][0] == 'V'); } tok = strtok(NULL, ";"); strcpy((char*)sreg_str, tok); assert(sreg_str[0] == 'S'); tok = strtok(NULL, ";"); //PC } #ifdef MIAOW_DEBUG fprintf(config, "Initializing VREG \n"); fflush(config); #endif //VREG value init int wi_init_count = 0; for (wi_init_count = 0; wi_init_count < wavefront->work_item_count; wi_init_count++) { if (wavefront->work_items != NULL) { int vreg_init_count = 0; char *reg_tok; struct si_work_item_t* wi = wavefront->work_items[wi_init_count]; reg_tok = strtok(vreg_str[wi_init_count], ":"); reg_tok = strtok(NULL, "="); for(vreg_init_count = 0; reg_tok != NULL; vreg_init_count++) { int vreg_index = atoi(reg_tok); reg_tok = strtok(NULL, ","); assert(reg_tok != NULL); wi->vreg[vreg_index].as_int = atoi(reg_tok); reg_tok = strtok(NULL, "="); } // make sure that all reg values were read assert(reg_tok == NULL); } } #ifdef MIAOW_DEBUG fprintf(config, "Initializing SREG \n"); fflush(config); #endif #ifdef MIAOW_DEBUG fprintf(config, "mask: %lld \n", mask); fprintf(config, "MASK HI: %u \n", wavefront->sreg[SI_EXEC + 1].as_uint); fprintf(config, "MASK LO: %u \n", wavefront->sreg[SI_EXEC].as_uint); fflush(config); #endif //SREG value init int sreg_init_count = 0; char *sreg_tok; sreg_tok = strtok(sreg_str, ":"); sreg_tok = strtok(NULL, "="); for(sreg_init_count=0; sreg_tok != NULL; sreg_init_count++) { int sreg_index = atoi(sreg_tok); sreg_tok = strtok(NULL, ","); assert(sreg_tok != NULL); wavefront->sreg[sreg_index].as_int = atoi(sreg_tok); sreg_tok = strtok(NULL, "="); } // make sure that all reg values were read assert(sreg_tok == NULL); } /*MIAOW stop*/ /*MIAOW start*/ //WorkGroup ID fprintf(config,"%d;",wavefront->work_group->id); //Wavefront ID fprintf(config,"%d;",wavefront->id_in_work_group); //Wavefront Count fprintf(config,"%d;",wavefront->work_group->wavefront_count); //Thread count fprintf(config,"%d;",wavefront->work_item_count); //VGPR size, SGPR size, LDS size fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used); fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used); fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->lds_size_used); #ifdef MIAOW_DEBUG fflush(config); #endif int wi_count = 0; for (wi_count = 0; wi_count < wavefront->work_item_count; wi_count++) { //VGPR initial values if (wavefront->work_items != NULL) { struct si_work_item_t* wi = wavefront->work_items[wi_count]; fprintf(config,"V:"); int vgpr_count = 0; for (vgpr_count = 0; vgpr_count < (kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used - 1); vgpr_count++) { //All VGPR values except the last fprintf(config,"%d=%d,", vgpr_count, wi->vreg[vgpr_count]); } //Last SGPR value fprintf(config,"%d=%d;", vgpr_count, wi->vreg[vgpr_count]); } } //SGPR initial values fprintf(config,"S:"); int sgpr_count = 0; for (sgpr_count = 0; sgpr_count < (kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used - 1); sgpr_count++) { //All SGPR values except the last fprintf(config,"%d=%d,", sgpr_count, wavefront->sreg[sgpr_count]); } //Last SGPR value fprintf(config,"%d=%d;", sgpr_count, wavefront->sreg[sgpr_count]); //PC start //fprintf(config,"%d",wavefront->wavefront_pool_start); fprintf(config, "0"); fprintf(config,"\n"); /*MIAOW stop*/ } /*MIAOW start */ fclose(config); char instr_str[100]; sprintf(instr_str, "instr_%d.mem", kernel_config_count); FILE* instr = fopen(instr_str, "w"); //fprintf(instr, "@%.8x\n", kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr); fprintf(instr, "@0\n"); for (int instr_count = 0; instr_count < kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size; instr_count++) { fprintf(instr, "%.2x\n", ((unsigned char*)kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr)[instr_count]); } fclose(instr); /*MIAOW stop */ /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", kernel->local_size, kernel->local_size3[0], kernel->local_size3[1], kernel->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", kernel->global_size, kernel->global_size3[0], kernel->global_size3[1], kernel->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", kernel->group_count, kernel->group_count3[0], kernel->group_count3[1], kernel->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug(" tid tid2 tid1 tid0 gid gid2 gid1 gid0 lid lid2 lid1 lid0 wavefront work-group\n"); for (tid = 0; tid < ndrange->work_item_count; tid++) { work_item = ndrange->work_items[tid]; wavefront = work_item->wavefront; work_group = work_item->work_group; si_isa_debug("%4d %4d %4d %4d ", work_item->id, work_item->id_3d[2], work_item->id_3d[1], work_item->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_group->id, work_group->id_3d[2], work_group->id_3d[1], work_group->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_item->id_in_work_group, work_item->id_in_work_group_3d[2], work_item->id_in_work_group_3d[1], work_item->id_in_work_group_3d[0]); si_isa_debug("%20s.%-4d ", wavefront->name, work_item->id_in_wavefront); si_isa_debug("%20s.%-4d\n", work_group->name, work_item->id_in_work_group); } }
struct si_work_group_t *si_work_group_create(unsigned int work_group_id, struct si_ndrange_t *ndrange) { struct si_bin_enc_user_element_t *user_elements; struct si_wavefront_t *wavefront; struct si_work_group_t *work_group; struct si_work_item_t *work_item; int i; int lid; int lidx, lidy, lidz; int tid; int user_element_count; int wavefront_id; int wavefront_offset; int work_item_id; int work_item_gidx_start; int work_item_gidy_start; int work_item_gidz_start; /* Number of in work-items in work-group */ unsigned int work_items_per_group = ndrange->local_size3[0] * ndrange->local_size3[1] * ndrange->local_size3[2]; assert(work_items_per_group > 0); /* Number of wavefronts in work-group */ unsigned int wavefronts_per_group = (work_items_per_group + (si_emu_wavefront_size - 1)) / si_emu_wavefront_size; assert(wavefronts_per_group > 0); /* Initialize */ work_group = xcalloc(1, sizeof(struct si_work_group_t)); work_group->id = work_group_id; work_group->ndrange = ndrange; /* Create LDS */ work_group->lds_module = mem_create(); work_group->lds_module->safe = 0; /* Allocate pointers for work-items (will actually be created when * wavefronts are created) */ work_group->work_items = xcalloc(si_emu_wavefront_size * wavefronts_per_group, sizeof(void *)); work_group->wavefronts = xcalloc(wavefronts_per_group, sizeof(void *)); work_group->wavefront_count = wavefronts_per_group; /* Allocate wavefronts and work-items */ SI_FOREACH_WAVEFRONT_IN_WORK_GROUP(work_group, wavefront_id) { work_group->wavefronts[wavefront_id] = si_wavefront_create( work_group->id * wavefronts_per_group + wavefront_id, work_group); wavefront = work_group->wavefronts[wavefront_id]; SI_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { wavefront_offset = wavefront_id * si_emu_wavefront_size; work_group->work_items[wavefront_offset+work_item_id] = wavefront->work_items[work_item_id]; work_group->work_items[wavefront_offset+work_item_id]-> work_group = work_group; }
struct si_work_group_t *si_work_group_create(unsigned int work_group_id, struct si_ndrange_t *ndrange) { struct si_bin_enc_user_element_t *user_elements; struct si_wavefront_t *wavefront; struct si_work_group_t *work_group; struct si_work_item_t *work_item; /*MIAOW Start */ struct opencl_si_kernel_t *kernel = ndrange->kernel; unsigned long long mask = 0x00000000; /*MIAOW Stop */ int i; int lid; int lidx, lidy, lidz; int tid; int user_element_count; int wavefront_id; int wavefront_offset; int work_item_id; int work_item_gidx_start; int work_item_gidy_start; int work_item_gidz_start; /*MIAOW start*/ //UNIT TEST char unit_test_input_buf[150000]; char *tok = NULL; char *config_read_result = NULL; char vreg_str[64][2500]; char sreg_str[2500]; int num_of_threads = 0; FILE* unit_test_config = fopen("unit_test_config.txt", "r"); if (unit_test_config != 0) { ndrange->local_size3[2] = 1; ndrange->local_size3[1] = 1; ndrange->global_size3[2] = 1; ndrange->global_size3[1] = 1; config_read_result = fgets(unit_test_input_buf, 150000, unit_test_config); if(config_read_result != NULL) { tok = strtok(unit_test_input_buf, ";"); //WG count ndrange->group_count = atoi(tok); tok = strtok(NULL, ";"); //total number of threads num_of_threads = atoi(tok); ndrange->global_size = atoi(tok); ndrange->global_size3[0] = atoi(tok); ndrange->local_size3[0] = atoi(tok); ndrange->local_size = atoi(tok); } } //WorkGroup count and thread count //fprintf(config,"%d;%d;\n", ndrange->group_count, ndrange->global_size); #ifdef MIAOW_DEBUG fflush(config); #endif /*MIAOW stop*/ /* Number of in work-items in work-group */ unsigned int work_items_per_group = ndrange->local_size3[0] * ndrange->local_size3[1] * ndrange->local_size3[2]; assert(work_items_per_group > 0); /* Number of wavefronts in work-group */ unsigned int wavefronts_per_group = (work_items_per_group + (si_emu_wavefront_size - 1)) / si_emu_wavefront_size; assert(wavefronts_per_group > 0); /* Initialize */ work_group = xcalloc(1, sizeof(struct si_work_group_t)); work_group->id = work_group_id; work_group->ndrange = ndrange; /* Create LDS */ work_group->lds_module = mem_create(); work_group->lds_module->safe = 0; /* Allocate pointers for work-items (will actually be created when * wavefronts are created) */ work_group->work_items = xcalloc(si_emu_wavefront_size * wavefronts_per_group, sizeof(void *)); work_group->wavefronts = xcalloc(wavefronts_per_group, sizeof(void *)); work_group->wavefront_count = wavefronts_per_group; /* Allocate wavefronts and work-items */ SI_FOREACH_WAVEFRONT_IN_WORK_GROUP(work_group, wavefront_id) { work_group->wavefronts[wavefront_id] = si_wavefront_create( work_group->id * wavefronts_per_group + wavefront_id, work_group); wavefront = work_group->wavefronts[wavefront_id]; SI_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { wavefront_offset = wavefront_id * si_emu_wavefront_size; work_group->work_items[wavefront_offset+work_item_id] = wavefront->work_items[work_item_id]; work_group->work_items[wavefront_offset+work_item_id]-> work_group = work_group; }