Exemple #1
0
static bool
ilo_cp_detect_hang(struct ilo_cp *cp)
{
   uint32_t active_lost, pending_lost;
   bool guilty = false;

   if (likely(!(ilo_debug & ILO_DEBUG_HANG)))
      return false;

   /* wait and get reset stats */
   if (intel_bo_wait(cp->last_submitted_bo, -1) ||
       intel_winsys_get_reset_stats(cp->winsys, cp->render_ctx,
          &active_lost, &pending_lost))
      return false;

   if (cp->active_lost != active_lost) {
      ilo_err("GPU hang caused by bo %p\n", cp->last_submitted_bo);
      cp->active_lost = active_lost;
      guilty = true;
   }

   if (cp->pending_lost != pending_lost) {
      ilo_err("GPU hang detected\n");
      cp->pending_lost = pending_lost;
   }

   return guilty;
}
/**
 * Translate the TGSI tokens.
 */
static bool
vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
              struct toy_tgsi *tgsi)
{
   if (ilo_debug & ILO_DEBUG_VS) {
      ilo_printf("dumping vertex shader\n");
      ilo_printf("\n");

      tgsi_dump(tokens, 0);
      ilo_printf("\n");
   }

   toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
   if (tc->fail) {
      ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_VS) {
      ilo_printf("TGSI translator:\n");
      toy_tgsi_dump(tgsi);
      ilo_printf("\n");
      toy_compiler_dump(tc);
      ilo_printf("\n");
   }

   return true;
}
Exemple #3
0
/**
 * Compile the shader.
 */
static bool
gs_compile(struct gs_compile_context *gcc)
{
   struct toy_compiler *tc = &gcc->tc;
   struct ilo_shader *sh = gcc->shader;

   get_num_prims_static(gcc);

   if (gcc->is_static) {
      tc_head(tc);

      gs_init_vars(gcc);
      gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
      gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
      if (gcc->write_so)
         gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);

      tc_tail(tc);
   }
   else {
      tc_fail(tc, "no control flow support");
      return false;
   }

   if (!gcc->write_vue)
      gs_discard(gcc);

   gs_lower_virtual_opcodes(gcc);
   toy_compiler_legalize_for_ra(tc);
   toy_compiler_optimize(tc);
   toy_compiler_allocate_registers(tc,
         gcc->first_free_grf,
         gcc->last_free_grf,
         1);
   toy_compiler_legalize_for_asm(tc);

   if (tc->fail) {
      ilo_err("failed to legalize GS instructions: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_GS) {
      ilo_printf("legalized instructions:\n");
      toy_compiler_dump(tc);
      ilo_printf("\n");
   }

   sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
   if (!sh->kernel)
      return false;

   if (ilo_debug & ILO_DEBUG_GS) {
      ilo_printf("disassembly:\n");
      toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
      ilo_printf("\n");
   }

   return true;
}
Exemple #4
0
static bool
tex_import_handle(struct ilo_texture *tex,
                  const struct winsys_handle *handle)
{
   struct ilo_screen *is = ilo_screen(tex->base.screen);
   const char *name = resource_get_bo_name(&tex->base);
   enum intel_tiling_mode tiling;
   unsigned long pitch;

   tex->bo = intel_winsys_import_handle(is->winsys, name, handle,
         tex->layout.bo_height, &tiling, &pitch);
   if (!tex->bo)
      return false;

   if (!ilo_layout_update_for_imported_bo(&tex->layout, tiling, pitch)) {
      ilo_err("imported handle has incompatible tiling/pitch\n");
      intel_bo_unreference(tex->bo);
      tex->bo = NULL;
      return false;
   }

   return true;
}
Exemple #5
0
static bool
gs_compile_passthrough(struct gs_compile_context *gcc)
{
   struct toy_compiler *tc = &gcc->tc;
   struct ilo_shader *sh = gcc->shader;

   gcc->is_static = true;
   gcc->static_data.total_vertices = gcc->in_vue_count;
   gcc->static_data.total_prims = 1;
   gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1);

   gs_init_vars(gcc);
   gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
   gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
   if (gcc->write_so)
      gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);

   {
      int vert, attr;

      for (vert = 0; vert < gcc->out_vue_min_count; vert++) {
         for (attr = 0; attr < gcc->shader->out.count; attr++) {
            tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]),
                  tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4));
         }

         gs_lower_opcode_emit(gcc, NULL);
      }

      gs_lower_opcode_endprim(gcc, NULL);
   }

   if (!gcc->write_vue)
      gs_discard(gcc);

   gs_lower_virtual_opcodes(gcc);

   toy_compiler_legalize_for_ra(tc);
   toy_compiler_optimize(tc);
   toy_compiler_allocate_registers(tc,
         gcc->first_free_grf,
         gcc->last_free_grf,
         1);

   toy_compiler_legalize_for_asm(tc);

   if (tc->fail) {
      ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_GS) {
      int i;

      ilo_printf("VUE count %d, VUE size %d\n",
            gcc->in_vue_count, gcc->in_vue_size);
      ilo_printf("%srasterizer discard\n",
            (gcc->variant->u.gs.rasterizer_discard) ? "" : "no ");

      for (i = 0; i < gcc->so_info->num_outputs; i++) {
         ilo_printf("SO[%d] = OUT[%d]\n", i,
               gcc->so_info->output[i].register_index);
      }

      ilo_printf("legalized instructions:\n");
      toy_compiler_dump(tc);
      ilo_printf("\n");
   }

   sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
   if (!sh->kernel) {
      ilo_err("failed to compile GS: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_GS) {
      ilo_printf("disassembly:\n");
      toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
      ilo_printf("\n");
   }

   return true;
}
/**
 * Compile the shader.
 */
static bool
vs_compile(struct vs_compile_context *vcc)
{
   struct toy_compiler *tc = &vcc->tc;
   struct ilo_shader *sh = vcc->shader;

   vs_lower_virtual_opcodes(vcc);
   toy_compiler_legalize_for_ra(tc);
   toy_compiler_optimize(tc);
   toy_compiler_allocate_registers(tc,
         vcc->first_free_grf,
         vcc->last_free_grf,
         vcc->num_grf_per_vrf);
   toy_compiler_legalize_for_asm(tc);

   if (tc->fail) {
      ilo_err("failed to legalize VS instructions: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_VS) {
      ilo_printf("legalized instructions:\n");
      toy_compiler_dump(tc);
      ilo_printf("\n");
   }

   if (true) {
      sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
   }
   else {
      static const uint32_t microcode[] = {
         /* fill in the microcode here */
         0x0, 0x0, 0x0, 0x0,
      };
      const bool swap = true;

      sh->kernel_size = sizeof(microcode);
      sh->kernel = MALLOC(sh->kernel_size);

      if (sh->kernel) {
         const int num_dwords = sizeof(microcode) / 4;
         const uint32_t *src = microcode;
         uint32_t *dst = (uint32_t *) sh->kernel;
         int i;

         for (i = 0; i < num_dwords; i += 4) {
            if (swap) {
               dst[i + 0] = src[i + 3];
               dst[i + 1] = src[i + 2];
               dst[i + 2] = src[i + 1];
               dst[i + 3] = src[i + 0];
            }
            else {
               memcpy(dst, src, 16);
            }
         }
      }
   }

   if (!sh->kernel) {
      ilo_err("failed to compile VS: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_VS) {
      ilo_printf("disassembly:\n");
      toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
      ilo_printf("\n");
   }

   return true;
}
Exemple #7
0
static bool
init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
{
   dev->devid = info->devid;
   dev->has_llc = info->has_llc;
   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
   dev->has_address_swizzling = info->has_address_swizzling;

   /*
    * From the Sandy Bridge PRM, volume 4 part 2, page 18:
    *
    *     "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
    *      as 1024 256-bit rows. The GT2 product's URB provides 64KB of
    *      storage, arranged as 2048 256-bit rows. A row corresponds in size
    *      to an EU GRF register. Read/write access to the URB is generally
    *      supported on a row-granular basis."
    *
    * From the Ivy Bridge PRM, volume 4 part 2, page 17:
    *
    *     "URB Size    URB Rows    URB Rows when SLM Enabled
    *      128k        4096        2048
    *      256k        8096        4096"
    */

   if (IS_HASWELL(info->devid)) {
      dev->gen = ILO_GEN(7.5);

      if (IS_HSW_GT3(info->devid)) {
         dev->gt = 3;
         dev->urb_size = 512 * 1024;
      }
      else if (IS_HSW_GT2(info->devid)) {
         dev->gt = 2;
         dev->urb_size = 256 * 1024;
      }
      else {
         dev->gt = 1;
         dev->urb_size = 128 * 1024;
      }
   }
   else if (IS_GEN7(info->devid)) {
      dev->gen = ILO_GEN(7);

      if (IS_IVB_GT2(info->devid)) {
         dev->gt = 2;
         dev->urb_size = 256 * 1024;
      }
      else {
         dev->gt = 1;
         dev->urb_size = 128 * 1024;
      }
   }
   else if (IS_GEN6(info->devid)) {
      dev->gen = ILO_GEN(6);

      if (IS_SNB_GT2(info->devid)) {
         dev->gt = 2;
         dev->urb_size = 64 * 1024;
      }
      else {
         dev->gt = 1;
         dev->urb_size = 32 * 1024;
      }
   }
   else {
      ilo_err("unknown GPU generation\n");
      return false;
   }

   return true;
}
Exemple #8
0
/**
 * Initialize the \p dev from \p winsys.
 */
bool
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys)
{
   const struct intel_winsys_info *info;

   assert(ilo_is_zeroed(dev, sizeof(*dev)));

   info = intel_winsys_get_info(winsys);

   dev->winsys = winsys;
   dev->devid = info->devid;
   dev->aperture_total = info->aperture_total;
   dev->aperture_mappable = info->aperture_mappable;
   dev->has_llc = info->has_llc;
   dev->has_address_swizzling = info->has_address_swizzling;
   dev->has_logical_context = info->has_logical_context;
   dev->has_ppgtt = info->has_ppgtt;
   dev->has_timestamp = info->has_timestamp;
   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;

   if (!dev->has_logical_context) {
      ilo_err("missing hardware logical context support\n");
      return false;
   }

   /*
    * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
    * writes on GEN6.
    *
    * From the Sandy Bridge PRM, volume 1 part 3, page 101:
    *
    *     "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
    *      code is in a secure environment, independent of address space.
    *      Under this condition, this bit only specifies the address space
    *      (GGTT or PPGTT). All commands are executed "as-is""
    *
    * We need PPGTT to be enabled on GEN6 too.
    */
   if (!dev->has_ppgtt) {
      /* experiments show that it does not really matter... */
      ilo_warn("PPGTT disabled\n");
   }

   if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) {
      dev->gen_opaque = ILO_GEN(8);
      dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1;
      /* XXX random values */
      if (dev->gt == 3) {
         dev->eu_count = 48;
         dev->thread_count = 336;
         dev->urb_size = 384 * 1024;
      } else if (dev->gt == 2) {
         dev->eu_count = 24;
         dev->thread_count = 168;
         dev->urb_size = 384 * 1024;
      } else {
         dev->eu_count = 12;
         dev->thread_count = 84;
         dev->urb_size = 192 * 1024;
      }
   } else if (gen_is_hsw(info->devid)) {
      /*
       * From the Haswell PRM, volume 4, page 8:
       *
       *     "Description                    GT3      GT2      GT1.5    GT1
       *      (...)
       *      EUs (Total)                    40       20       12       10
       *      Threads (Total)                280      140      84       70
       *      (...)
       *      URB Size (max, within L3$)     512KB    256KB    256KB    128KB
       */
      dev->gen_opaque = ILO_GEN(7.5);
      dev->gt = gen_get_hsw_gt(info->devid);
      if (dev->gt == 3) {
         dev->eu_count = 40;
         dev->thread_count = 280;
         dev->urb_size = 512 * 1024;
      } else if (dev->gt == 2) {
         dev->eu_count = 20;
         dev->thread_count = 140;
         dev->urb_size = 256 * 1024;
      } else {
         dev->eu_count = 10;
         dev->thread_count = 70;
         dev->urb_size = 128 * 1024;
      }
   } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
      /*
       * From the Ivy Bridge PRM, volume 1 part 1, page 18:
       *
       *     "Device             # of EUs        #Threads/EU
       *      Ivy Bridge (GT2)   16              8
       *      Ivy Bridge (GT1)   6               6"
       *
       * From the Ivy Bridge PRM, volume 4 part 2, page 17:
       *
       *     "URB Size    URB Rows    URB Rows when SLM Enabled
       *      128k        4096        2048
       *      256k        8096        4096"
       */
      dev->gen_opaque = ILO_GEN(7);
      dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
      if (dev->gt == 2) {
         dev->eu_count = 16;
         dev->thread_count = 128;
         dev->urb_size = 256 * 1024;
      } else {
         dev->eu_count = 6;
         dev->thread_count = 36;
         dev->urb_size = 128 * 1024;
      }
   } else if (gen_is_snb(info->devid)) {
      /*
       * From the Sandy Bridge PRM, volume 1 part 1, page 22:
       *
       *     "Device             # of EUs        #Threads/EU
       *      SNB GT2            12              5
       *      SNB GT1            6               4"
       *
       * From the Sandy Bridge PRM, volume 4 part 2, page 18:
       *
       *     "[DevSNB]: The GT1 product's URB provides 32KB of storage,
       *      arranged as 1024 256-bit rows. The GT2 product's URB provides
       *      64KB of storage, arranged as 2048 256-bit rows. A row
       *      corresponds in size to an EU GRF register. Read/write access to
       *      the URB is generally supported on a row-granular basis."
       */
      dev->gen_opaque = ILO_GEN(6);
      dev->gt = gen_get_snb_gt(info->devid);
      if (dev->gt == 2) {
         dev->eu_count = 12;
         dev->thread_count = 60;
         dev->urb_size = 64 * 1024;
      } else {
         dev->eu_count = 6;
         dev->thread_count = 24;
         dev->urb_size = 32 * 1024;
      }
   } else {
      ilo_err("unknown GPU generation\n");
      return false;
   }

   return true;
}