void si_ndrange_setup_work_items(struct si_ndrange_t *ndrange) { struct si_opencl_kernel_t *kernel = ndrange->kernel; struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /* Array of work-groups */ ndrange->work_group_count = kernel->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = calloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < kernel->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (kernel->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = calloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = calloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } /* Array of work-items */ ndrange->work_item_count = kernel->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = calloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < kernel->group_count3[2]; gidz++) { for (gidy = 0; gidy < kernel->group_count3[1]; gidy++) { for (gidx = 0; gidx < kernel->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + kernel->local_size; work_group->work_item_count = kernel->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < kernel->local_size3[2]; lidz++) { for (lidy = 0; lidy < kernel->local_size3[1]; lidy++) { for (lidx = 0; lidx < kernel->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * kernel->local_size3[0] + lidx; work_item->id_3d[1] = gidy * kernel->local_size3[1] + lidy; work_item->id_3d[2] = gidz * kernel->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; /* Save local IDs in registers */ work_item->vreg[0].as_int = lidx; /* V0 */ work_item->vreg[1].as_int = lidy; /* V1 */ work_item->vreg[2].as_int = lidz; /* V2 */ /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); /* Initialize wavefront program counter */ if (!kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size) fatal("%s: cannot load kernel code", __FUNCTION__); wavefront->inst_buf_start = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; wavefront->inst_buf = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; /* Save work-group IDs in registers */ unsigned int user_sgpr = kernel->bin_file->enc_dict_entry_southern_islands->compute_pgm_rsrc2->user_sgpr; wavefront->sreg[user_sgpr].as_int = wavefront->work_group->id_3d[0]; wavefront->sreg[user_sgpr + 1].as_int = wavefront->work_group->id_3d[1]; wavefront->sreg[user_sgpr + 2].as_int = wavefront->work_group->id_3d[2]; /* Initialize Constant Buffers */ unsigned int userElementCount = kernel->bin_file->enc_dict_entry_southern_islands->userElementCount; struct si_bin_enc_user_element_t* userElements = kernel->bin_file->enc_dict_entry_southern_islands->userElements; for (int i = 0; i < userElementCount; i++) { if (userElements[i].dataClass == IMM_CONST_BUFFER) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == IMM_UAV) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == PTR_CONST_BUFFER_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else if (userElements[i].dataClass == PTR_UAV_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else { fatal("Unimplemented User Element: dataClass:%d", userElements[i].dataClass); } } /* Initialize the execution mask */ wavefront->sreg[SI_EXEC].as_int = 0xFFFFFFFF; wavefront->sreg[SI_EXEC + 1].as_int = 0xFFFFFFFF; wavefront->sreg[SI_EXECZ].as_int = 0; } /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", kernel->local_size, kernel->local_size3[0], kernel->local_size3[1], kernel->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", kernel->global_size, kernel->global_size3[0], kernel->global_size3[1], kernel->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", kernel->group_count, kernel->group_count3[0], kernel->group_count3[1], kernel->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug(" tid tid2 tid1 tid0 gid gid2 gid1 gid0 lid lid2 lid1 lid0 wavefront work-group\n"); for (tid = 0; tid < ndrange->work_item_count; tid++) { work_item = ndrange->work_items[tid]; wavefront = work_item->wavefront; work_group = work_item->work_group; si_isa_debug("%4d %4d %4d %4d ", work_item->id, work_item->id_3d[2], work_item->id_3d[1], work_item->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_group->id, work_group->id_3d[2], work_group->id_3d[1], work_group->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_item->id_in_work_group, work_item->id_in_work_group_3d[2], work_item->id_in_work_group_3d[1], work_item->id_in_work_group_3d[0]); si_isa_debug("%20s.%-4d ", wavefront->name, work_item->id_in_wavefront); si_isa_debug("%20s.%-4d\n", work_group->name, work_item->id_in_work_group); } }
void si_ndrange_setup_work_items(struct si_ndrange_t *ndrange) { struct si_opencl_kernel_t *kernel = ndrange->kernel; struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /*MIAOW start */ char config_str[100]; sprintf(config_str, "config_%d.txt", kernel_config_count); FILE* config = fopen(config_str, "w"); /*MIAOW stop */ /*MIAOW start*/ //UNIT TEST char unit_test_input_buf[150000]; char *tok = NULL; char *config_read_result = NULL; char vreg_str[64][2500]; char sreg_str[2500]; FILE* unit_test_config = fopen("unit_test_config.txt", "r"); if (unit_test_config != 0) { int i; int num_of_threads = 0; //ndrange->wavefront_count = 1; //kernel->group_count = 1; kernel->local_size3[2] = 1; kernel->local_size3[1] = 1; kernel->global_size3[2] = 1; kernel->global_size3[1] = 1; config_read_result = fgets(unit_test_input_buf, 150000, unit_test_config); if(config_read_result != NULL) { tok = strtok(unit_test_input_buf, ";"); //WG count kernel->group_count = atoi(tok); tok = strtok(NULL, ";"); //total number of threads num_of_threads = atoi(tok); kernel->global_size = atoi(tok); kernel->global_size3[0] = atoi(tok); kernel->local_size3[0] = atoi(tok); kernel->local_size = atoi(tok); } } //WorkGroup count and thread count fprintf(config,"%d;%d;\n", kernel->group_count, kernel->global_size); #ifdef MIAOW_DEBUG fflush(config); #endif /*MIAOW stop*/ /* Array of work-groups */ ndrange->work_group_count = kernel->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = xcalloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < kernel->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (kernel->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = xcalloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = xcalloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } #ifdef MIAOW_DEBUG fprintf(config, "Processing Workitems\n"); fflush(config); #endif /* Array of work-items */ ndrange->work_item_count = kernel->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = xcalloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < kernel->group_count3[2]; gidz++) { for (gidy = 0; gidy < kernel->group_count3[1]; gidy++) { for (gidx = 0; gidx < kernel->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + kernel->local_size; work_group->work_item_count = kernel->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < kernel->local_size3[2]; lidz++) { for (lidy = 0; lidy < kernel->local_size3[1]; lidy++) { for (lidx = 0; lidx < kernel->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * kernel->local_size3[0] + lidx; work_item->id_3d[1] = gidy * kernel->local_size3[1] + lidy; work_item->id_3d[2] = gidz * kernel->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /*MIAOW start*/ work_item->id = work_item->id_in_wavefront; /*MIAOW stop*/ /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; //Initializing all vreg values to zero, so that config.txt doesnt change with each run /*MIAOW start*/ for (int vreg_init_index; vreg_init_index < 256; vreg_init_index++) { work_item->vreg[vreg_init_index].as_int = 0; } /*MIAOW stop*/ /* Save local IDs in registers */ work_item->vreg[0].as_int = lidx; /* V0 */ work_item->vreg[1].as_int = lidy; /* V1 */ work_item->vreg[2].as_int = lidz; /* V2 */ /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /*MIAOW start */ //This part is for unit test trace generation. //If the file unit_test_instr.mem is present, the contents will be read and placed in the instruction buffer. FILE* unit_test_instr = fopen("unit_test_instr.mem", "r"); if (unit_test_instr != 0) { unsigned char instr_buf[200]; int input_instr_count = 0; fgets(instr_buf, 200, unit_test_instr); //address unsigned char* buf_ptr = (unsigned char*)kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; while (fgets(instr_buf, 200, unit_test_instr) != NULL) { instr_buf[2] = '\0'; //interested only in first byte. unsigned char cur_instr = (unsigned char)strtol(instr_buf, 0, 16); buf_ptr[input_instr_count++] = cur_instr; } kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size = input_instr_count; fclose(unit_test_instr); } /*MIAOW stop */ /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); /* Initialize wavefront program counter */ if (!kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size) fatal("%s: cannot load kernel code", __FUNCTION__); wavefront->wavefront_pool_start = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; wavefront->wavefront_pool = kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr; //Initializing all sreg values to zero, so that config.txt doesnt change with each run /*MIAOW start*/ for (int sreg_init_index; sreg_init_index < 256; sreg_init_index++) { //wavefront->sreg[sreg_init_index].as_int = 0; } /*MIAOW stop*/ /* Save work-group IDs in registers */ unsigned int user_sgpr = kernel->bin_file-> enc_dict_entry_southern_islands->compute_pgm_rsrc2->user_sgpr; wavefront->sreg[user_sgpr].as_int = wavefront->work_group->id_3d[0]; wavefront->sreg[user_sgpr + 1].as_int = wavefront->work_group->id_3d[1]; wavefront->sreg[user_sgpr + 2].as_int = wavefront->work_group->id_3d[2]; /* Initialize Constant Buffers */ unsigned int userElementCount = kernel->bin_file->enc_dict_entry_southern_islands->userElementCount; struct si_bin_enc_user_element_t* userElements = kernel->bin_file->enc_dict_entry_southern_islands->userElements; for (int i = 0; i < userElementCount; i++) { if (userElements[i].dataClass == IMM_CONST_BUFFER) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == IMM_UAV) { si_wavefront_init_sreg_with_cb(wavefront, userElements[i].startUserReg, userElements[i].userRegCount, userElements[i].apiSlot); } else if (userElements[i].dataClass == PTR_CONST_BUFFER_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else if (userElements[i].dataClass == PTR_UAV_TABLE) { si_wavefront_init_sreg_with_uav_table(wavefront, userElements[i].startUserReg, userElements[i].userRegCount); } else { fatal("Unimplemented User Element: dataClass:%d", userElements[i].dataClass); } } //MIAOW m2s is not setting exec mask properly /* Initialize the execution mask */ //wavefront->sreg[SI_EXEC].as_int = 0xFFFFFFFF; //wavefront->sreg[SI_EXEC + 1].as_int = 0xFFFFFFFF; //wavefront->sreg[SI_EXECZ].as_int = 0; /*MIAOW start*/ //EXEC Mask init unsigned long long mask; if(wavefront->work_item_count == 64) { mask = 0xFFFFFFFFFFFFFFFF; } else { mask = powl(2, wavefront->work_item_count) - 1; } wavefront->sreg[SI_EXEC].as_uint = (unsigned int)mask; wavefront->sreg[SI_EXEC + 1].as_uint = mask>>32; wavefront->sreg[SI_EXECZ].as_int = 0; /*MIAOW stop*/ /*MIAOW start*/ if(config_read_result != NULL) { if(NULL != fgets(unit_test_input_buf, 150000, unit_test_config)) { int num_of_threads = 0; int thread_init_count = 0; tok = strtok(unit_test_input_buf, ";"); //WGID tok = strtok(NULL, ";"); //WFID tok = strtok(NULL, ";"); //WF count tok = strtok(NULL, ";"); //thread count num_of_threads = atoi(tok); #ifdef MIAOW_DEBUG if (num_of_threads != wavefront->work_item_count) { fprintf(config, "num_thread MISMATCH %d!=%d\n", num_of_threads, wavefront->work_item_count); } else { fprintf(config, "num_thread match %d=%d\n", num_of_threads, wavefront->work_item_count); } fflush(config); #endif tok = strtok(NULL, ";"); //VREG size kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used = atoi(tok); tok = strtok(NULL, ";"); //SREG size kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used = atoi(tok); tok = strtok(NULL, ";"); //LDS size kernel->bin_file->enc_dict_entry_southern_islands->lds_size_used = atoi(tok); for(thread_init_count = 0; thread_init_count < num_of_threads; thread_init_count++) { tok = strtok(NULL, ";"); strcpy((char*)vreg_str[thread_init_count], tok); assert(vreg_str[thread_init_count][0] == 'V'); } tok = strtok(NULL, ";"); strcpy((char*)sreg_str, tok); assert(sreg_str[0] == 'S'); tok = strtok(NULL, ";"); //PC } #ifdef MIAOW_DEBUG fprintf(config, "Initializing VREG \n"); fflush(config); #endif //VREG value init int wi_init_count = 0; for (wi_init_count = 0; wi_init_count < wavefront->work_item_count; wi_init_count++) { if (wavefront->work_items != NULL) { int vreg_init_count = 0; char *reg_tok; struct si_work_item_t* wi = wavefront->work_items[wi_init_count]; reg_tok = strtok(vreg_str[wi_init_count], ":"); reg_tok = strtok(NULL, "="); for(vreg_init_count = 0; reg_tok != NULL; vreg_init_count++) { int vreg_index = atoi(reg_tok); reg_tok = strtok(NULL, ","); assert(reg_tok != NULL); wi->vreg[vreg_index].as_int = atoi(reg_tok); reg_tok = strtok(NULL, "="); } // make sure that all reg values were read assert(reg_tok == NULL); } } #ifdef MIAOW_DEBUG fprintf(config, "Initializing SREG \n"); fflush(config); #endif #ifdef MIAOW_DEBUG fprintf(config, "mask: %lld \n", mask); fprintf(config, "MASK HI: %u \n", wavefront->sreg[SI_EXEC + 1].as_uint); fprintf(config, "MASK LO: %u \n", wavefront->sreg[SI_EXEC].as_uint); fflush(config); #endif //SREG value init int sreg_init_count = 0; char *sreg_tok; sreg_tok = strtok(sreg_str, ":"); sreg_tok = strtok(NULL, "="); for(sreg_init_count=0; sreg_tok != NULL; sreg_init_count++) { int sreg_index = atoi(sreg_tok); sreg_tok = strtok(NULL, ","); assert(sreg_tok != NULL); wavefront->sreg[sreg_index].as_int = atoi(sreg_tok); sreg_tok = strtok(NULL, "="); } // make sure that all reg values were read assert(sreg_tok == NULL); } /*MIAOW stop*/ /*MIAOW start*/ //WorkGroup ID fprintf(config,"%d;",wavefront->work_group->id); //Wavefront ID fprintf(config,"%d;",wavefront->id_in_work_group); //Wavefront Count fprintf(config,"%d;",wavefront->work_group->wavefront_count); //Thread count fprintf(config,"%d;",wavefront->work_item_count); //VGPR size, SGPR size, LDS size fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used); fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used); fprintf(config,"%d;",kernel->bin_file->enc_dict_entry_southern_islands->lds_size_used); #ifdef MIAOW_DEBUG fflush(config); #endif int wi_count = 0; for (wi_count = 0; wi_count < wavefront->work_item_count; wi_count++) { //VGPR initial values if (wavefront->work_items != NULL) { struct si_work_item_t* wi = wavefront->work_items[wi_count]; fprintf(config,"V:"); int vgpr_count = 0; for (vgpr_count = 0; vgpr_count < (kernel->bin_file->enc_dict_entry_southern_islands->num_vgpr_used - 1); vgpr_count++) { //All VGPR values except the last fprintf(config,"%d=%d,", vgpr_count, wi->vreg[vgpr_count]); } //Last SGPR value fprintf(config,"%d=%d;", vgpr_count, wi->vreg[vgpr_count]); } } //SGPR initial values fprintf(config,"S:"); int sgpr_count = 0; for (sgpr_count = 0; sgpr_count < (kernel->bin_file->enc_dict_entry_southern_islands->num_sgpr_used - 1); sgpr_count++) { //All SGPR values except the last fprintf(config,"%d=%d,", sgpr_count, wavefront->sreg[sgpr_count]); } //Last SGPR value fprintf(config,"%d=%d;", sgpr_count, wavefront->sreg[sgpr_count]); //PC start //fprintf(config,"%d",wavefront->wavefront_pool_start); fprintf(config, "0"); fprintf(config,"\n"); /*MIAOW stop*/ } /*MIAOW start */ fclose(config); char instr_str[100]; sprintf(instr_str, "instr_%d.mem", kernel_config_count); FILE* instr = fopen(instr_str, "w"); //fprintf(instr, "@%.8x\n", kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr); fprintf(instr, "@0\n"); for (int instr_count = 0; instr_count < kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.size; instr_count++) { fprintf(instr, "%.2x\n", ((unsigned char*)kernel->bin_file->enc_dict_entry_southern_islands->sec_text_buffer.ptr)[instr_count]); } fclose(instr); /*MIAOW stop */ /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", kernel->local_size, kernel->local_size3[0], kernel->local_size3[1], kernel->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", kernel->global_size, kernel->global_size3[0], kernel->global_size3[1], kernel->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", kernel->group_count, kernel->group_count3[0], kernel->group_count3[1], kernel->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug(" tid tid2 tid1 tid0 gid gid2 gid1 gid0 lid lid2 lid1 lid0 wavefront work-group\n"); for (tid = 0; tid < ndrange->work_item_count; tid++) { work_item = ndrange->work_items[tid]; wavefront = work_item->wavefront; work_group = work_item->work_group; si_isa_debug("%4d %4d %4d %4d ", work_item->id, work_item->id_3d[2], work_item->id_3d[1], work_item->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_group->id, work_group->id_3d[2], work_group->id_3d[1], work_group->id_3d[0]); si_isa_debug("%4d %4d %4d %4d ", work_item->id_in_work_group, work_item->id_in_work_group_3d[2], work_item->id_in_work_group_3d[1], work_item->id_in_work_group_3d[0]); si_isa_debug("%20s.%-4d ", wavefront->name, work_item->id_in_wavefront); si_isa_debug("%20s.%-4d\n", work_group->name, work_item->id_in_work_group); } }
static void si_ndrange_setup_arrays(struct si_ndrange_t *ndrange) { struct si_work_group_t *work_group; struct si_wavefront_t *wavefront; struct si_work_item_t *work_item; int gidx, gidy, gidz; /* 3D work-group ID iterators */ int lidx, lidy, lidz; /* 3D work-item local ID iterators */ int tid; /* Global ID iterator */ int gid; /* Group ID iterator */ int wid; /* Wavefront ID iterator */ int lid; /* Local ID iterator */ /* Array of work-groups */ ndrange->work_group_count = ndrange->group_count; ndrange->work_group_id_first = 0; ndrange->work_group_id_last = ndrange->work_group_count - 1; ndrange->work_groups = xcalloc(ndrange->work_group_count, sizeof(void *)); for (gid = 0; gid < ndrange->group_count; gid++) { ndrange->work_groups[gid] = si_work_group_create(); work_group = ndrange->work_groups[gid]; } /* Array of wavefronts */ ndrange->wavefronts_per_work_group = (ndrange->local_size + si_emu_wavefront_size - 1) / si_emu_wavefront_size; ndrange->wavefront_count = ndrange->wavefronts_per_work_group * ndrange->work_group_count; ndrange->wavefront_id_first = 0; ndrange->wavefront_id_last = ndrange->wavefront_count - 1; assert(ndrange->wavefronts_per_work_group > 0 && ndrange->wavefront_count > 0); ndrange->wavefronts = xcalloc(ndrange->wavefront_count, sizeof(void *)); ndrange->scalar_work_items = xcalloc(ndrange->wavefront_count, sizeof(void *)); for (wid = 0; wid < ndrange->wavefront_count; wid++) { gid = wid / ndrange->wavefronts_per_work_group; ndrange->wavefronts[wid] = si_wavefront_create(); wavefront = ndrange->wavefronts[wid]; work_group = ndrange->work_groups[gid]; wavefront->id = wid; wavefront->id_in_work_group = wid % ndrange->wavefronts_per_work_group; wavefront->ndrange = ndrange; wavefront->work_group = work_group; DOUBLE_LINKED_LIST_INSERT_TAIL(work_group, running, wavefront); /* Initialize the scalar work item */ ndrange->scalar_work_items[wid] = si_work_item_create(); wavefront->scalar_work_item = ndrange->scalar_work_items[wid]; ndrange->scalar_work_items[wid]->wavefront = wavefront; ndrange->scalar_work_items[wid]->work_group = work_group; ndrange->scalar_work_items[wid]->ndrange = ndrange; } /* Array of work-items */ ndrange->work_item_count = ndrange->global_size; ndrange->work_item_id_first = 0; ndrange->work_item_id_last = ndrange->work_item_count - 1; ndrange->work_items = xcalloc(ndrange->work_item_count, sizeof(void *)); tid = 0; gid = 0; for (gidz = 0; gidz < ndrange->group_count3[2]; gidz++) { for (gidy = 0; gidy < ndrange->group_count3[1]; gidy++) { for (gidx = 0; gidx < ndrange->group_count3[0]; gidx++) { /* Assign work-group ID */ work_group = ndrange->work_groups[gid]; work_group->ndrange = ndrange; work_group->id_3d[0] = gidx; work_group->id_3d[1] = gidy; work_group->id_3d[2] = gidz; work_group->id = gid; si_work_group_set_status(work_group, si_work_group_pending); /* First, last, and number of work-items in work-group */ work_group->work_item_id_first = tid; work_group->work_item_id_last = tid + ndrange->local_size; work_group->work_item_count = ndrange->local_size; work_group->work_items = &ndrange->work_items[tid]; snprintf(work_group->name, sizeof(work_group->name), "work-group[i%d-i%d]", work_group->work_item_id_first, work_group->work_item_id_last); /* First ,last, and number of wavefronts in work-group */ work_group->wavefront_id_first = gid * ndrange->wavefronts_per_work_group; work_group->wavefront_id_last = work_group->wavefront_id_first + ndrange->wavefronts_per_work_group - 1; work_group->wavefront_count = ndrange->wavefronts_per_work_group; work_group->wavefronts = &ndrange->wavefronts[work_group->wavefront_id_first]; /* Iterate through work-items */ lid = 0; for (lidz = 0; lidz < ndrange->local_size3[2]; lidz++) { for (lidy = 0; lidy < ndrange->local_size3[1]; lidy++) { for (lidx = 0; lidx < ndrange->local_size3[0]; lidx++) { /* Wavefront ID */ wid = gid * ndrange->wavefronts_per_work_group + lid / si_emu_wavefront_size; assert(wid < ndrange->wavefront_count); wavefront = ndrange->wavefronts[wid]; /* Create work-item */ ndrange->work_items[tid] = si_work_item_create(); work_item = ndrange->work_items[tid]; work_item->ndrange = ndrange; /* Global IDs */ work_item->id_3d[0] = gidx * ndrange->local_size3[0] + lidx; work_item->id_3d[1] = gidy * ndrange->local_size3[1] + lidy; work_item->id_3d[2] = gidz * ndrange->local_size3[2] + lidz; work_item->id = tid; /* Local IDs */ work_item->id_in_work_group_3d[0] = lidx; work_item->id_in_work_group_3d[1] = lidy; work_item->id_in_work_group_3d[2] = lidz; work_item->id_in_work_group = lid; /* Other */ work_item->id_in_wavefront = work_item->id_in_work_group % si_emu_wavefront_size; work_item->work_group = ndrange->work_groups[gid]; work_item->wavefront = ndrange->wavefronts[wid]; /*MIAOW start*/ work_item->id = work_item->id_in_wavefront; /*MIAOW stop*/ /* First, last, and number of work-items in wavefront */ if (!wavefront->work_item_count) { wavefront->work_item_id_first = tid; wavefront->work_items = &ndrange->work_items[tid]; } wavefront->work_item_count++; wavefront->work_item_id_last = tid; /* Next work-item */ tid++; lid++; } } } /* Next work-group */ gid++; } } } /* Initialize the wavefronts */ for (wid = 0; wid < ndrange->wavefront_count; wid++) { /* Assign names to wavefronts */ wavefront = ndrange->wavefronts[wid]; snprintf(wavefront->name, sizeof(wavefront->name), "wavefront[i%d-i%d]", wavefront->work_item_id_first, wavefront->work_item_id_last); } /* Debug */ si_isa_debug("local_size = %d (%d,%d,%d)\n", ndrange->local_size, ndrange->local_size3[0], ndrange->local_size3[1], ndrange->local_size3[2]); si_isa_debug("global_size = %d (%d,%d,%d)\n", ndrange->global_size, ndrange->global_size3[0], ndrange->global_size3[1], ndrange->global_size3[2]); si_isa_debug("group_count = %d (%d,%d,%d)\n", ndrange->group_count, ndrange->group_count3[0], ndrange->group_count3[1], ndrange->group_count3[2]); si_isa_debug("wavefront_count = %d\n", ndrange->wavefront_count); si_isa_debug("wavefronts_per_work_group = %d\n", ndrange->wavefronts_per_work_group); si_isa_debug("\n"); }
/* Execute one instruction in the wavefront */ void si_wavefront_execute(struct si_wavefront_t *wavefront) { struct si_ndrange_t *ndrange; struct si_work_group_t *work_group; struct si_work_item_t *work_item; struct si_inst_t *inst; char inst_dump[MAX_INST_STR_SIZE]; unsigned int pc; ndrange = wavefront->ndrange; int work_item_id; /* Get current work-group */ ndrange = wavefront->ndrange; work_group = wavefront->work_group; work_item = NULL; inst = NULL; assert(!DOUBLE_LINKED_LIST_MEMBER(work_group, finished, wavefront)); /* Reset instruction flags */ wavefront->vector_mem_write = 0; wavefront->vector_mem_read = 0; wavefront->scalar_mem_read = 0; wavefront->local_mem_write = 0; wavefront->local_mem_read = 0; wavefront->pred_mask_update = 0; wavefront->mem_wait = 0; wavefront->barrier = 0; assert(!wavefront->finished); /* Grab the next instruction and update the pointer */ wavefront->inst_size = si_inst_decode(wavefront->wavefront_pool, &wavefront->inst, 0); /* Stats */ si_emu->inst_count++; wavefront->emu_inst_count++; wavefront->inst_count++; /* Set the current instruction */ inst = &wavefront->inst; pc = wavefront->wavefront_pool - wavefront->wavefront_pool_start; /*MIAOW start - Print the debug message to stdout, stderr to the open file stream*/ si_isa_debug("\n###%d_%d_%d", kernel_config_count - 1, wavefront->work_group->id, wavefront->id_in_work_group, pc, wavefront->inst_size); /*MIAOW stop*/ /* Execute the current instruction */ switch (inst->info->fmt) { /* Scalar ALU Instructions */ case SI_FMT_SOP1: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sop1(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOP2: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sop2(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); //Calling a function pointer in machine.c if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOPP: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sopp(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ if (wavefront->inst.micro_inst.sopp.op > 1 && wavefront->inst.micro_inst.sopp.op < 10) { si_emu->branch_inst_count++; wavefront->branch_inst_count++; } else { si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; } /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOPC: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sopc(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_SOPK: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_sopk(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_alu_inst_count++; wavefront->scalar_alu_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } /* Scalar Memory Instructions */ case SI_FMT_SMRD: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_smrd(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->scalar_mem_inst_count++; wavefront->scalar_mem_inst_count++; /* Only one work item executes the instruction */ work_item = wavefront->scalar_work_item; (*si_isa_inst_func[inst->info->inst])(work_item, inst); if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } /* Vector ALU Instructions */ case SI_FMT_VOP2: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_vop2(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->vector_alu_inst_count++; wavefront->vector_alu_inst_count++; /* Execute the instruction */ SI_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = ndrange->work_items[work_item_id]; if(si_wavefront_work_item_active(wavefront, work_item->id_in_wavefront)) (*si_isa_inst_func[inst->info->inst])(work_item, inst); } if (debug_status(si_isa_debug_category)) { si_isa_debug("\n"); } break; } case SI_FMT_VOP1: { /* Dump instruction string when debugging */ if (debug_status(si_isa_debug_category)) { si_inst_dump_vop1(inst, wavefront->inst_size, pc, wavefront->wavefront_pool, inst_dump, MAX_INST_STR_SIZE); si_isa_debug("\n%s", inst_dump); } /* Stats */ si_emu->vector_alu_inst_count++; wavefront->vector_alu_inst_count++; if (inst->micro_inst.vop1.op == 2) { /* Instruction ignores execution mask and is only executed on one work item. * Execute on the first active work item from the least significant bit in EXEC. * (if exec is 0, execute work item 0) */ work_item = ndrange->work_items[wavefront->work_item_id_first]; if (si_isa_read_sreg(work_item, SI_EXEC) == 0 && si_isa_read_sreg(work_item, SI_EXEC + 1) == 0) { (*si_isa_inst_func[inst->info->inst])(work_item, inst); } else { SI_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = ndrange->work_items[work_item_id]; if(si_wavefront_work_item_active(wavefront, work_item->id_in_wavefront)) { (*si_isa_inst_func[inst->info->inst])(work_item, inst); break; } } } }