static bool ilo_cp_detect_hang(struct ilo_cp *cp) { uint32_t active_lost, pending_lost; bool guilty = false; if (likely(!(ilo_debug & ILO_DEBUG_HANG))) return false; /* wait and get reset stats */ if (intel_bo_wait(cp->last_submitted_bo, -1) || intel_winsys_get_reset_stats(cp->winsys, cp->render_ctx, &active_lost, &pending_lost)) return false; if (cp->active_lost != active_lost) { ilo_err("GPU hang caused by bo %p\n", cp->last_submitted_bo); cp->active_lost = active_lost; guilty = true; } if (cp->pending_lost != pending_lost) { ilo_err("GPU hang detected\n"); cp->pending_lost = pending_lost; } return guilty; }
/** * Translate the TGSI tokens. */ static bool vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, struct toy_tgsi *tgsi) { if (ilo_debug & ILO_DEBUG_VS) { ilo_printf("dumping vertex shader\n"); ilo_printf("\n"); tgsi_dump(tokens, 0); ilo_printf("\n"); } toy_compiler_translate_tgsi(tc, tokens, true, tgsi); if (tc->fail) { ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_VS) { ilo_printf("TGSI translator:\n"); toy_tgsi_dump(tgsi); ilo_printf("\n"); toy_compiler_dump(tc); ilo_printf("\n"); } return true; }
/** * Compile the shader. */ static bool gs_compile(struct gs_compile_context *gcc) { struct toy_compiler *tc = &gcc->tc; struct ilo_shader *sh = gcc->shader; get_num_prims_static(gcc); if (gcc->is_static) { tc_head(tc); gs_init_vars(gcc); gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); if (gcc->write_so) gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); tc_tail(tc); } else { tc_fail(tc, "no control flow support"); return false; } if (!gcc->write_vue) gs_discard(gcc); gs_lower_virtual_opcodes(gcc); toy_compiler_legalize_for_ra(tc); toy_compiler_optimize(tc); toy_compiler_allocate_registers(tc, gcc->first_free_grf, gcc->last_free_grf, 1); toy_compiler_legalize_for_asm(tc); if (tc->fail) { ilo_err("failed to legalize GS instructions: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_GS) { ilo_printf("legalized instructions:\n"); toy_compiler_dump(tc); ilo_printf("\n"); } sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); if (!sh->kernel) return false; if (ilo_debug & ILO_DEBUG_GS) { ilo_printf("disassembly:\n"); toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); ilo_printf("\n"); } return true; }
static bool tex_import_handle(struct ilo_texture *tex, const struct winsys_handle *handle) { struct ilo_screen *is = ilo_screen(tex->base.screen); const char *name = resource_get_bo_name(&tex->base); enum intel_tiling_mode tiling; unsigned long pitch; tex->bo = intel_winsys_import_handle(is->winsys, name, handle, tex->layout.bo_height, &tiling, &pitch); if (!tex->bo) return false; if (!ilo_layout_update_for_imported_bo(&tex->layout, tiling, pitch)) { ilo_err("imported handle has incompatible tiling/pitch\n"); intel_bo_unreference(tex->bo); tex->bo = NULL; return false; } return true; }
static bool gs_compile_passthrough(struct gs_compile_context *gcc) { struct toy_compiler *tc = &gcc->tc; struct ilo_shader *sh = gcc->shader; gcc->is_static = true; gcc->static_data.total_vertices = gcc->in_vue_count; gcc->static_data.total_prims = 1; gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1); gs_init_vars(gcc); gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); if (gcc->write_so) gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); { int vert, attr; for (vert = 0; vert < gcc->out_vue_min_count; vert++) { for (attr = 0; attr < gcc->shader->out.count; attr++) { tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]), tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4)); } gs_lower_opcode_emit(gcc, NULL); } gs_lower_opcode_endprim(gcc, NULL); } if (!gcc->write_vue) gs_discard(gcc); gs_lower_virtual_opcodes(gcc); toy_compiler_legalize_for_ra(tc); toy_compiler_optimize(tc); toy_compiler_allocate_registers(tc, gcc->first_free_grf, gcc->last_free_grf, 1); toy_compiler_legalize_for_asm(tc); if (tc->fail) { ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_GS) { int i; ilo_printf("VUE count %d, VUE size %d\n", gcc->in_vue_count, gcc->in_vue_size); ilo_printf("%srasterizer discard\n", (gcc->variant->u.gs.rasterizer_discard) ? "" : "no "); for (i = 0; i < gcc->so_info->num_outputs; i++) { ilo_printf("SO[%d] = OUT[%d]\n", i, gcc->so_info->output[i].register_index); } ilo_printf("legalized instructions:\n"); toy_compiler_dump(tc); ilo_printf("\n"); } sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); if (!sh->kernel) { ilo_err("failed to compile GS: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_GS) { ilo_printf("disassembly:\n"); toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); ilo_printf("\n"); } return true; }
/** * Compile the shader. */ static bool vs_compile(struct vs_compile_context *vcc) { struct toy_compiler *tc = &vcc->tc; struct ilo_shader *sh = vcc->shader; vs_lower_virtual_opcodes(vcc); toy_compiler_legalize_for_ra(tc); toy_compiler_optimize(tc); toy_compiler_allocate_registers(tc, vcc->first_free_grf, vcc->last_free_grf, vcc->num_grf_per_vrf); toy_compiler_legalize_for_asm(tc); if (tc->fail) { ilo_err("failed to legalize VS instructions: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_VS) { ilo_printf("legalized instructions:\n"); toy_compiler_dump(tc); ilo_printf("\n"); } if (true) { sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); } else { static const uint32_t microcode[] = { /* fill in the microcode here */ 0x0, 0x0, 0x0, 0x0, }; const bool swap = true; sh->kernel_size = sizeof(microcode); sh->kernel = MALLOC(sh->kernel_size); if (sh->kernel) { const int num_dwords = sizeof(microcode) / 4; const uint32_t *src = microcode; uint32_t *dst = (uint32_t *) sh->kernel; int i; for (i = 0; i < num_dwords; i += 4) { if (swap) { dst[i + 0] = src[i + 3]; dst[i + 1] = src[i + 2]; dst[i + 2] = src[i + 1]; dst[i + 3] = src[i + 0]; } else { memcpy(dst, src, 16); } } } } if (!sh->kernel) { ilo_err("failed to compile VS: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_VS) { ilo_printf("disassembly:\n"); toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); ilo_printf("\n"); } return true; }
static bool init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info) { dev->devid = info->devid; dev->has_llc = info->has_llc; dev->has_gen7_sol_reset = info->has_gen7_sol_reset; dev->has_address_swizzling = info->has_address_swizzling; /* * From the Sandy Bridge PRM, volume 4 part 2, page 18: * * "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged * as 1024 256-bit rows. The GT2 product's URB provides 64KB of * storage, arranged as 2048 256-bit rows. A row corresponds in size * to an EU GRF register. Read/write access to the URB is generally * supported on a row-granular basis." * * From the Ivy Bridge PRM, volume 4 part 2, page 17: * * "URB Size URB Rows URB Rows when SLM Enabled * 128k 4096 2048 * 256k 8096 4096" */ if (IS_HASWELL(info->devid)) { dev->gen = ILO_GEN(7.5); if (IS_HSW_GT3(info->devid)) { dev->gt = 3; dev->urb_size = 512 * 1024; } else if (IS_HSW_GT2(info->devid)) { dev->gt = 2; dev->urb_size = 256 * 1024; } else { dev->gt = 1; dev->urb_size = 128 * 1024; } } else if (IS_GEN7(info->devid)) { dev->gen = ILO_GEN(7); if (IS_IVB_GT2(info->devid)) { dev->gt = 2; dev->urb_size = 256 * 1024; } else { dev->gt = 1; dev->urb_size = 128 * 1024; } } else if (IS_GEN6(info->devid)) { dev->gen = ILO_GEN(6); if (IS_SNB_GT2(info->devid)) { dev->gt = 2; dev->urb_size = 64 * 1024; } else { dev->gt = 1; dev->urb_size = 32 * 1024; } } else { ilo_err("unknown GPU generation\n"); return false; } return true; }
/** * Initialize the \p dev from \p winsys. */ bool ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys) { const struct intel_winsys_info *info; assert(ilo_is_zeroed(dev, sizeof(*dev))); info = intel_winsys_get_info(winsys); dev->winsys = winsys; dev->devid = info->devid; dev->aperture_total = info->aperture_total; dev->aperture_mappable = info->aperture_mappable; dev->has_llc = info->has_llc; dev->has_address_swizzling = info->has_address_swizzling; dev->has_logical_context = info->has_logical_context; dev->has_ppgtt = info->has_ppgtt; dev->has_timestamp = info->has_timestamp; dev->has_gen7_sol_reset = info->has_gen7_sol_reset; if (!dev->has_logical_context) { ilo_err("missing hardware logical context support\n"); return false; } /* * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT * writes on GEN6. * * From the Sandy Bridge PRM, volume 1 part 3, page 101: * * "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all * code is in a secure environment, independent of address space. * Under this condition, this bit only specifies the address space * (GGTT or PPGTT). All commands are executed "as-is"" * * We need PPGTT to be enabled on GEN6 too. */ if (!dev->has_ppgtt) { /* experiments show that it does not really matter... */ ilo_warn("PPGTT disabled\n"); } if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) { dev->gen_opaque = ILO_GEN(8); dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1; /* XXX random values */ if (dev->gt == 3) { dev->eu_count = 48; dev->thread_count = 336; dev->urb_size = 384 * 1024; } else if (dev->gt == 2) { dev->eu_count = 24; dev->thread_count = 168; dev->urb_size = 384 * 1024; } else { dev->eu_count = 12; dev->thread_count = 84; dev->urb_size = 192 * 1024; } } else if (gen_is_hsw(info->devid)) { /* * From the Haswell PRM, volume 4, page 8: * * "Description GT3 GT2 GT1.5 GT1 * (...) * EUs (Total) 40 20 12 10 * Threads (Total) 280 140 84 70 * (...) * URB Size (max, within L3$) 512KB 256KB 256KB 128KB */ dev->gen_opaque = ILO_GEN(7.5); dev->gt = gen_get_hsw_gt(info->devid); if (dev->gt == 3) { dev->eu_count = 40; dev->thread_count = 280; dev->urb_size = 512 * 1024; } else if (dev->gt == 2) { dev->eu_count = 20; dev->thread_count = 140; dev->urb_size = 256 * 1024; } else { dev->eu_count = 10; dev->thread_count = 70; dev->urb_size = 128 * 1024; } } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) { /* * From the Ivy Bridge PRM, volume 1 part 1, page 18: * * "Device # of EUs #Threads/EU * Ivy Bridge (GT2) 16 8 * Ivy Bridge (GT1) 6 6" * * From the Ivy Bridge PRM, volume 4 part 2, page 17: * * "URB Size URB Rows URB Rows when SLM Enabled * 128k 4096 2048 * 256k 8096 4096" */ dev->gen_opaque = ILO_GEN(7); dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1; if (dev->gt == 2) { dev->eu_count = 16; dev->thread_count = 128; dev->urb_size = 256 * 1024; } else { dev->eu_count = 6; dev->thread_count = 36; dev->urb_size = 128 * 1024; } } else if (gen_is_snb(info->devid)) { /* * From the Sandy Bridge PRM, volume 1 part 1, page 22: * * "Device # of EUs #Threads/EU * SNB GT2 12 5 * SNB GT1 6 4" * * From the Sandy Bridge PRM, volume 4 part 2, page 18: * * "[DevSNB]: The GT1 product's URB provides 32KB of storage, * arranged as 1024 256-bit rows. The GT2 product's URB provides * 64KB of storage, arranged as 2048 256-bit rows. A row * corresponds in size to an EU GRF register. Read/write access to * the URB is generally supported on a row-granular basis." */ dev->gen_opaque = ILO_GEN(6); dev->gt = gen_get_snb_gt(info->devid); if (dev->gt == 2) { dev->eu_count = 12; dev->thread_count = 60; dev->urb_size = 64 * 1024; } else { dev->eu_count = 6; dev->thread_count = 24; dev->urb_size = 32 * 1024; } } else { ilo_err("unknown GPU generation\n"); return false; } return true; }