Exemple #1
0
static int
nouveau_codegen(int chipset, int type, struct tgsi_token tokens[],
                unsigned *size, unsigned **code) {
   struct nv50_ir_prog_info info = {0};
   int ret;

   info.type = type;
   info.target = chipset;
   info.bin.sourceRep = NV50_PROGRAM_IR_TGSI;
   info.bin.source = tokens;

   info.io.ucpCBSlot = 15;
   info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET;

   info.io.resInfoCBSlot = 15;
   info.io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
   info.io.msInfoCBSlot = 15;
   info.io.msInfoBase = NV50_CB_AUX_MS_OFFSET;

   info.assignSlots = dummy_assign_slots;

   info.optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
   info.dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);

   ret = nv50_ir_generate_code(&info);
   if (ret) {
      _debug_printf("Error compiling program: %d\n", ret);
      return ret;
   }

   *size = info.bin.codeSize;
   *code = info.bin.code;
   return 0;
}
Exemple #2
0
/**
 * Create a new pipe_screen object
 * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
 */
struct pipe_screen *
llvmpipe_create_screen(struct sw_winsys *winsys)
{
   struct llvmpipe_screen *screen;

   util_cpu_detect();

#ifdef DEBUG
   LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
#endif

   LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 );

   screen = CALLOC_STRUCT(llvmpipe_screen);
   if (!screen)
      return NULL;

   if (!lp_jit_screen_init(screen)) {
      FREE(screen);
      return NULL;
   }

   screen->winsys = winsys;

   screen->base.destroy = llvmpipe_destroy_screen;

   screen->base.get_name = llvmpipe_get_name;
   screen->base.get_vendor = llvmpipe_get_vendor;
   screen->base.get_device_vendor = llvmpipe_get_vendor; // TODO should be the CPU vendor
   screen->base.get_param = llvmpipe_get_param;
   screen->base.get_shader_param = llvmpipe_get_shader_param;
   screen->base.get_paramf = llvmpipe_get_paramf;
   screen->base.is_format_supported = llvmpipe_is_format_supported;

   screen->base.context_create = llvmpipe_create_context;
   screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
   screen->base.fence_reference = llvmpipe_fence_reference;
   screen->base.fence_finish = llvmpipe_fence_finish;

   screen->base.get_timestamp = llvmpipe_get_timestamp;

   llvmpipe_init_screen_resource_funcs(&screen->base);

   screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0;
#ifdef PIPE_SUBSYSTEM_EMBEDDED
   screen->num_threads = 0;
#endif
   screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
   screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);

   screen->rast = lp_rast_create(screen->num_threads);
   if (!screen->rast) {
      lp_jit_screen_cleanup(screen);
      FREE(screen);
      return NULL;
   }
   (void) mtx_init(&screen->rast_mutex, mtx_plain);

   return &screen->base;
}
Exemple #3
0
VdpStatus
vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
                              VdpOutputSurface surface,
                              uint32_t clip_width,
                              uint32_t clip_height,
                              VdpTime  earliest_presentation_time)
{
   static int dump_window = -1;

   vlVdpPresentationQueue *pq;
   vlVdpOutputSurface *surf;
   struct pipe_surface *drawable_surface;

   pq = vlGetDataHTAB(presentation_queue);
   if (!pq)
      return VDP_STATUS_INVALID_HANDLE;

   drawable_surface = vl_drawable_surface_get(pq->device->context, pq->drawable);
   if (!drawable_surface)
      return VDP_STATUS_INVALID_HANDLE;

   surf = vlGetDataHTAB(surface);
   if (!surf)
      return VDP_STATUS_INVALID_HANDLE;

   vl_compositor_clear_layers(&pq->compositor);
   vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL);
   vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
                        drawable_surface, NULL, NULL);

   pq->device->context->pipe->screen->flush_frontbuffer
   (
      pq->device->context->pipe->screen,
      drawable_surface->texture,
      0, 0,
      vl_contextprivate_get(pq->device->context, drawable_surface)
   );

   if(dump_window == -1) {
      dump_window = debug_get_num_option("VDPAU_DUMP", 0);
   }

   if(dump_window) {
      static unsigned int framenum = 0;
      char cmd[256];

      sprintf(cmd, "xwd -id %d -out vdpau_frame_%08d.xwd", (int)pq->drawable, ++framenum);
      if (system(cmd) != 0)
         VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Dumping surface %d failed.\n", surface);
   }
   
   pipe_surface_reference(&drawable_surface, NULL);

   return VDP_STATUS_OK;
}
Exemple #4
0
void
lp_build_init(void)
{
   if (gallivm_initialized)
      return;

#ifdef DEBUG
   gallivm_debug = debug_get_option_gallivm_debug();
#endif

   lp_set_target_options();

#if USE_MCJIT
   LLVMLinkInMCJIT();
#else
   LLVMLinkInJIT();
#endif

   util_cpu_detect();

   /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
    * 8-wide vector needs more floating ops than 4-wide (due to padding), it is
    * actually more efficient to use 4-wide vectors on this processor.
    *
    * See also:
    * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2
    */
   if (HAVE_AVX &&
       util_cpu_caps.has_avx &&
       util_cpu_caps.has_intel) {
      lp_native_vector_width = 256;
   } else {
      /* Leave it at 128, even when no SIMD extensions are available.
       * Really needs to be a multiple of 128 so can fit 4 floats.
       */
      lp_native_vector_width = 128;
   }
 
   lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
                                                 lp_native_vector_width);

   gallivm_initialized = TRUE;

#if 0
   /* For simulating less capable machines */
   util_cpu_caps.has_sse3 = 0;
   util_cpu_caps.has_ssse3 = 0;
   util_cpu_caps.has_sse4_1 = 0;
#endif
}
void
lp_build_init(void)
{
   if (gallivm_initialized)
      return;

#ifdef DEBUG
   gallivm_debug = debug_get_option_gallivm_debug();
#endif

   lp_set_target_options();

#if USE_MCJIT
   LLVMLinkInMCJIT();
#else
   LLVMLinkInJIT();
#endif

   util_cpu_detect();

   if (HAVE_AVX &&
       util_cpu_caps.has_avx) {
      lp_native_vector_width = 256;
   } else {
      /* Leave it at 128, even when no SIMD extensions are available.
       * Really needs to be a multiple of 128 so can fit 4 floats.
       */
      lp_native_vector_width = 128;
   }
 
   lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
                                                 lp_native_vector_width);

   gallivm_initialized = TRUE;

#if 0
   /* For simulating less capable machines */
   util_cpu_caps.has_sse3 = 0;
   util_cpu_caps.has_ssse3 = 0;
   util_cpu_caps.has_sse4_1 = 0;
#endif
}
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
{
	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
	struct schedule_state s;
	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
	unsigned int * opt = user;

	memset(&s, 0, sizeof(s));
	s.Opt = *opt;
	s.C = &c->Base;
	if (s.C->is_r500) {
		s.CalcScore = calc_score_readers;
	} else {
		s.CalcScore = calc_score_r300;
	}
	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
	while(inst != &c->Base.Program.Instructions) {
		struct rc_instruction * first;

		if (is_controlflow(inst)) {
			inst = inst->Next;
			continue;
		}

		first = inst;

		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
			inst = inst->Next;

		DBG("Schedule one block\n");
		memset(s.Temporary, 0, sizeof(s.Temporary));
		s.TEXCount = 0;
		schedule_block(&s, first, inst);
		if (s.PendingTEX) {
			s.PrevBlockHasTex = 1;
		}
	}
}
Exemple #7
0
PUBLIC
Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
                      short srcx, short srcy, unsigned short srcw, unsigned short srch,
                      short destx, short desty, unsigned short destw, unsigned short desth,
                      int flags)
{
   static int dump_window = -1;

   struct pipe_context *pipe;
   struct vl_compositor *compositor;
   struct vl_compositor_state *cstate;
   struct vl_screen *vscreen;

   XvMCSurfacePrivate *surface_priv;
   XvMCContextPrivate *context_priv;
   XvMCSubpicturePrivate *subpicture_priv;
   XvMCContext *context;
   struct u_rect src_rect = {srcx, srcx + srcw, srcy, srcy + srch};
   struct u_rect dst_rect = {destx, destx + destw, desty, desty + desth};

   struct pipe_resource *tex;
   struct pipe_surface surf_templ, *surf;
   struct u_rect *dirty_area;

   XVMC_MSG(XVMC_TRACE, "[XvMC] Displaying surface %p.\n", surface);

   assert(dpy);

   if (!surface || !surface->privData)
      return XvMCBadSurface;

   surface_priv = surface->privData;
   context = surface_priv->context;
   context_priv = context->privData;

   assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
   assert(srcx + srcw - 1 < surface->width);
   assert(srcy + srch - 1 < surface->height);

   subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
   pipe = context_priv->pipe;
   compositor = &context_priv->compositor;
   cstate = &context_priv->cstate;
   vscreen = context_priv->vscreen;

   tex = vscreen->texture_from_drawable(vscreen, (void *)drawable);
   dirty_area = vscreen->get_dirty_area(vscreen);

   memset(&surf_templ, 0, sizeof(surf_templ));
   surf_templ.format = tex->format;
   surf = pipe->create_surface(pipe, tex, &surf_templ);

   if (!surf)
      return BadDrawable;

   /*
    * Some apps (mplayer) hit these asserts because they call
    * this function after the window has been resized by the WM
    * but before they've handled the corresponding XEvent and
    * know about the new dimensions. The output should be clipped
    * until the app updates destw and desth.
    */
   /*
   assert(destx + destw - 1 < drawable_surface->width);
   assert(desty + desth - 1 < drawable_surface->height);
    */

   RecursiveEndFrame(surface_priv);

   context_priv->decoder->flush(context_priv->decoder);

   vl_compositor_clear_layers(cstate);
   vl_compositor_set_buffer_layer(cstate, compositor, 0, surface_priv->video_buffer,
                                  &src_rect, NULL, VL_COMPOSITOR_WEAVE);

   if (subpicture_priv) {
      XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);

      assert(subpicture_priv->surface == surface);

      if (subpicture_priv->palette)
         vl_compositor_set_palette_layer(cstate, compositor, 1, subpicture_priv->sampler, subpicture_priv->palette,
                                         &subpicture_priv->src_rect, &subpicture_priv->dst_rect, true);
      else
         vl_compositor_set_rgba_layer(cstate, compositor, 1, subpicture_priv->sampler,
                                      &subpicture_priv->src_rect, &subpicture_priv->dst_rect, NULL);

      surface_priv->subpicture = NULL;
      subpicture_priv->surface = NULL;
   }

   // Workaround for r600g, there seems to be a bug in the fence refcounting code
   pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL);

   vl_compositor_set_layer_dst_area(cstate, 0, &dst_rect);
   vl_compositor_set_layer_dst_area(cstate, 1, &dst_rect);
   vl_compositor_render(cstate, compositor, surf, dirty_area, true);

   pipe->flush(pipe, &surface_priv->fence, 0);

   XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);

   pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
                                   vscreen->get_private(vscreen), NULL);

   if(dump_window == -1) {
      dump_window = debug_get_num_option("XVMC_DUMP", 0);
   }

   if(dump_window) {
      static unsigned int framenum = 0;
      char cmd[256];

      sprintf(cmd, "xwd -id %d -out xvmc_frame_%08d.xwd", (int)drawable, ++framenum);
      if (system(cmd) != 0)
         XVMC_MSG(XVMC_ERR, "[XvMC] Dumping surface %p failed.\n", surface);
   }

   XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);

   return Success;
}
Exemple #8
0
struct pipe_screen *
ddebug_screen_create(struct pipe_screen *screen)
{
   struct dd_screen *dscreen;
   const char *option = debug_get_option("GALLIUM_DDEBUG", NULL);
   bool dump_always = option && !strcmp(option, "always");
   bool no_flush = option && strstr(option, "noflush");
   bool help = option && !strcmp(option, "help");
   unsigned timeout = 0;

   if (help) {
      puts("Gallium driver debugger");
      puts("");
      puts("Usage:");
      puts("");
      puts("  GALLIUM_DDEBUG=always");
      puts("    Dump context and driver information after every draw call into");
      puts("    $HOME/"DD_DIR"/.");
      puts("");
      puts("  GALLIUM_DDEBUG=[timeout in ms] noflush");
      puts("    Flush and detect a device hang after every draw call based on the given");
      puts("    fence timeout and dump context and driver information into");
      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
      puts("    If 'noflush' is specified, only detect hangs in pipe->flush.");
      puts("");
      puts("  GALLIUM_DDEBUG_SKIP=[count]");
      puts("    Skip flush and hang detection for the given initial number of draw calls.");
      puts("");
      exit(0);
   }

   if (!option)
      return screen;
   if (!dump_always && sscanf(option, "%u", &timeout) != 1)
      return screen;

   dscreen = CALLOC_STRUCT(dd_screen);
   if (!dscreen)
      return NULL;

#define SCR_INIT(_member) \
   dscreen->base._member = screen->_member ? dd_screen_##_member : NULL

   dscreen->base.destroy = dd_screen_destroy;
   dscreen->base.get_name = dd_screen_get_name;
   dscreen->base.get_vendor = dd_screen_get_vendor;
   dscreen->base.get_device_vendor = dd_screen_get_device_vendor;
   dscreen->base.get_param = dd_screen_get_param;
   dscreen->base.get_paramf = dd_screen_get_paramf;
   dscreen->base.get_shader_param = dd_screen_get_shader_param;
   /* get_video_param */
   /* get_compute_param */
   SCR_INIT(get_timestamp);
   dscreen->base.context_create = dd_screen_context_create;
   dscreen->base.is_format_supported = dd_screen_is_format_supported;
   /* is_video_format_supported */
   SCR_INIT(can_create_resource);
   dscreen->base.resource_create = dd_screen_resource_create;
   dscreen->base.resource_from_handle = dd_screen_resource_from_handle;
   SCR_INIT(resource_from_user_memory);
   dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
   dscreen->base.resource_destroy = dd_screen_resource_destroy;
   SCR_INIT(flush_frontbuffer);
   SCR_INIT(fence_reference);
   SCR_INIT(fence_finish);
   SCR_INIT(get_driver_query_info);
   SCR_INIT(get_driver_query_group_info);

#undef SCR_INIT

   dscreen->screen = screen;
   dscreen->timeout_ms = timeout;
   dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS;
   dscreen->no_flush = no_flush;

   switch (dscreen->mode) {
   case DD_DUMP_ALL_CALLS:
      fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
      break;
   case DD_DETECT_HANGS:
      fprintf(stderr, "Gallium debugger active. "
              "The hang detection timout is %i ms.\n", timeout);
      break;
   default:
      assert(0);
   }

   dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
   if (dscreen->skip_count > 0) {
      fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",
              dscreen->skip_count);
   }

   return &dscreen->base;
}
Exemple #9
0
struct pipe_screen *
ddebug_screen_create(struct pipe_screen *screen)
{
   struct dd_screen *dscreen;
   const char *option;
   bool flush = false;
   bool verbose = false;
   bool transfers = false;
   unsigned timeout = 1000;
   unsigned apitrace_dump_call = 0;
   enum dd_dump_mode mode = DD_DUMP_ONLY_HANGS;

   option = debug_get_option("GALLIUM_DDEBUG", NULL);
   if (!option)
      return screen;

   if (!strcmp(option, "help")) {
      puts("Gallium driver debugger");
      puts("");
      puts("Usage:");
      puts("");
      puts("  GALLIUM_DDEBUG=\"[<timeout in ms>] [(always|apitrace <call#)] [flush] [transfers] [verbose]\"");
      puts("  GALLIUM_DDEBUG_SKIP=[count]");
      puts("");
      puts("Dump context and driver information of draw calls into");
      puts("$HOME/"DD_DIR"/. By default, watch for GPU hangs and only dump information");
      puts("about draw calls related to the hang.");
      puts("");
      puts("<timeout in ms>");
      puts("  Change the default timeout for GPU hang detection (default=1000ms).");
      puts("  Setting this to 0 will disable GPU hang detection entirely.");
      puts("");
      puts("always");
      puts("  Dump information about all draw calls.");
      puts("");
      puts("transfers");
      puts("  Also dump and do hang detection on transfers.");
      puts("");
      puts("apitrace <call#>");
      puts("  Dump information about the draw call corresponding to the given");
      puts("  apitrace call number and exit.");
      puts("");
      puts("flush");
      puts("  Flush after every draw call.");
      puts("");
      puts("verbose");
      puts("  Write additional information to stderr.");
      puts("");
      puts("GALLIUM_DDEBUG_SKIP=count");
      puts("  Skip dumping on the first count draw calls (only relevant with 'always').");
      puts("");
      exit(0);
   }

   for (;;) {
      skip_space(&option);
      if (!*option)
         break;

      if (match_word(&option, "always")) {
         if (mode == DD_DUMP_APITRACE_CALL) {
            printf("ddebug: both 'always' and 'apitrace' specified\n");
            exit(1);
         }

         mode = DD_DUMP_ALL_CALLS;
      } else if (match_word(&option, "flush")) {
         flush = true;
      } else if (match_word(&option, "transfers")) {
         transfers = true;
      } else if (match_word(&option, "verbose")) {
         verbose = true;
      } else if (match_word(&option, "apitrace")) {
         if (mode != DD_DUMP_ONLY_HANGS) {
            printf("ddebug: 'apitrace' can only appear once and not mixed with 'always'\n");
            exit(1);
         }

         if (!match_uint(&option, &apitrace_dump_call)) {
            printf("ddebug: expected call number after 'apitrace'\n");
            exit(1);
         }

         mode = DD_DUMP_APITRACE_CALL;
      } else if (match_uint(&option, &timeout)) {
         /* no-op */
      } else {
         printf("ddebug: bad options: %s\n", option);
         exit(1);
      }
   }

   dscreen = CALLOC_STRUCT(dd_screen);
   if (!dscreen)
      return NULL;

#define SCR_INIT(_member) \
   dscreen->base._member = screen->_member ? dd_screen_##_member : NULL

   dscreen->base.destroy = dd_screen_destroy;
   dscreen->base.get_name = dd_screen_get_name;
   dscreen->base.get_vendor = dd_screen_get_vendor;
   dscreen->base.get_device_vendor = dd_screen_get_device_vendor;
   SCR_INIT(get_disk_shader_cache);
   dscreen->base.get_param = dd_screen_get_param;
   dscreen->base.get_paramf = dd_screen_get_paramf;
   dscreen->base.get_compute_param = dd_screen_get_compute_param;
   dscreen->base.get_shader_param = dd_screen_get_shader_param;
   dscreen->base.query_memory_info = dd_screen_query_memory_info;
   /* get_video_param */
   /* get_compute_param */
   SCR_INIT(get_timestamp);
   dscreen->base.context_create = dd_screen_context_create;
   dscreen->base.is_format_supported = dd_screen_is_format_supported;
   /* is_video_format_supported */
   SCR_INIT(can_create_resource);
   dscreen->base.resource_create = dd_screen_resource_create;
   dscreen->base.resource_from_handle = dd_screen_resource_from_handle;
   SCR_INIT(resource_from_memobj);
   SCR_INIT(resource_from_user_memory);
   SCR_INIT(check_resource_capability);
   dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
   SCR_INIT(resource_changed);
   dscreen->base.resource_destroy = dd_screen_resource_destroy;
   SCR_INIT(flush_frontbuffer);
   SCR_INIT(fence_reference);
   SCR_INIT(fence_finish);
   SCR_INIT(memobj_create_from_handle);
   SCR_INIT(memobj_destroy);
   SCR_INIT(get_driver_query_info);
   SCR_INIT(get_driver_query_group_info);
   SCR_INIT(get_compiler_options);
   SCR_INIT(get_driver_uuid);
   SCR_INIT(get_device_uuid);

#undef SCR_INIT

   dscreen->screen = screen;
   dscreen->timeout_ms = timeout;
   dscreen->dump_mode = mode;
   dscreen->flush_always = flush;
   dscreen->transfers = transfers;
   dscreen->verbose = verbose;
   dscreen->apitrace_dump_call = apitrace_dump_call;

   switch (dscreen->dump_mode) {
   case DD_DUMP_ALL_CALLS:
      fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
      break;
   case DD_DUMP_APITRACE_CALL:
      fprintf(stderr, "Gallium debugger active. Going to dump an apitrace call.\n");
      break;
   default:
      fprintf(stderr, "Gallium debugger active.\n");
      break;
   }

   if (dscreen->timeout_ms > 0)
      fprintf(stderr, "Hang detection timeout is %ums.\n", dscreen->timeout_ms);
   else
      fprintf(stderr, "Hang detection is disabled.\n");

   dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
   if (dscreen->skip_count > 0) {
      fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",
              dscreen->skip_count);
   }

   return &dscreen->base;
}
Exemple #10
0
/**
 * Create a new pipe_screen object
 * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
 */
struct pipe_screen *
llvmpipe_create_screen(struct sw_winsys *winsys)
{
   struct llvmpipe_screen *screen;

   util_cpu_detect();

#if defined(PIPE_ARCH_X86) && HAVE_LLVM < 0x0302
   /* require SSE2 due to LLVM PR6960. */
   if (!util_cpu_caps.has_sse2)
       return NULL;
#endif

#ifdef DEBUG
   LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
#endif

   LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 );

   screen = CALLOC_STRUCT(llvmpipe_screen);
   if (!screen)
      return NULL;

   screen->winsys = winsys;

   screen->base.destroy = llvmpipe_destroy_screen;

   screen->base.get_name = llvmpipe_get_name;
   screen->base.get_vendor = llvmpipe_get_vendor;
   screen->base.get_param = llvmpipe_get_param;
   screen->base.get_shader_param = llvmpipe_get_shader_param;
   screen->base.get_paramf = llvmpipe_get_paramf;
   screen->base.is_format_supported = llvmpipe_is_format_supported;

   screen->base.context_create = llvmpipe_create_context;
   screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
   screen->base.fence_reference = llvmpipe_fence_reference;
   screen->base.fence_signalled = llvmpipe_fence_signalled;
   screen->base.fence_finish = llvmpipe_fence_finish;

   screen->base.get_timestamp = llvmpipe_get_timestamp;

   llvmpipe_init_screen_resource_funcs(&screen->base);

   lp_jit_screen_init(screen);

   screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0;
#ifdef PIPE_SUBSYSTEM_EMBEDDED
   screen->num_threads = 0;
#endif
   screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
   screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);

   screen->rast = lp_rast_create(screen->num_threads);
   if (!screen->rast) {
      lp_jit_screen_cleanup(screen);
      FREE(screen);
      return NULL;
   }
   pipe_mutex_init(screen->rast_mutex);

   util_format_s3tc_init();

   return &screen->base;
}
Exemple #11
0
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
					   const struct pipe_screen_config *config)
{
	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
	unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;

	if (!sscreen) {
		return NULL;
	}

	sscreen->ws = ws;
	ws->query_info(ws, &sscreen->info);

	if (sscreen->info.chip_class >= GFX9) {
		sscreen->se_tile_repeat = 32 * sscreen->info.max_se;
	} else {
		ac_get_raster_config(&sscreen->info,
				     &sscreen->pa_sc_raster_config,
				     &sscreen->pa_sc_raster_config_1,
				     &sscreen->se_tile_repeat);
	}

	sscreen->debug_flags = debug_get_flags_option("R600_DEBUG",
						      debug_options, 0);
	sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG",
						       debug_options, 0);

	/* Set functions first. */
	sscreen->b.context_create = si_pipe_create_context;
	sscreen->b.destroy = si_destroy_screen;
	sscreen->b.set_max_shader_compiler_threads =
		si_set_max_shader_compiler_threads;
	sscreen->b.is_parallel_shader_compilation_finished =
		si_is_parallel_shader_compilation_finished;

	si_init_screen_get_functions(sscreen);
	si_init_screen_buffer_functions(sscreen);
	si_init_screen_fence_functions(sscreen);
	si_init_screen_state_functions(sscreen);
	si_init_screen_texture_functions(sscreen);
	si_init_screen_query_functions(sscreen);

	/* Set these flags in debug_flags early, so that the shader cache takes
	 * them into account.
	 */
	if (driQueryOptionb(config->options,
			    "glsl_correct_derivatives_after_discard"))
		sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
	if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
		sscreen->debug_flags |= DBG(SI_SCHED);

	if (sscreen->debug_flags & DBG(INFO))
		ac_print_gpu_info(&sscreen->info);

	slab_create_parent(&sscreen->pool_transfers,
			   sizeof(struct si_transfer), 64);

	sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
	if (sscreen->force_aniso >= 0) {
		printf("radeonsi: Forcing anisotropy filter to %ix\n",
		       /* round down to a power of two */
		       1 << util_logbase2(sscreen->force_aniso));
	}

	(void) mtx_init(&sscreen->aux_context_lock, mtx_plain);
	(void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain);

	si_init_gs_info(sscreen);
	if (!si_init_shader_cache(sscreen)) {
		FREE(sscreen);
		return NULL;
	}

	si_disk_cache_create(sscreen);

	/* Determine the number of shader compiler threads. */
	hw_threads = sysconf(_SC_NPROCESSORS_ONLN);

	if (hw_threads >= 12) {
		num_comp_hi_threads = hw_threads * 3 / 4;
		num_comp_lo_threads = hw_threads / 3;
	} else if (hw_threads >= 6) {
		num_comp_hi_threads = hw_threads - 2;
		num_comp_lo_threads = hw_threads / 2;
	} else if (hw_threads >= 2) {
		num_comp_hi_threads = hw_threads - 1;
		num_comp_lo_threads = hw_threads / 2;
	} else {
		num_comp_hi_threads = 1;
		num_comp_lo_threads = 1;
	}

	num_comp_hi_threads = MIN2(num_comp_hi_threads,
				   ARRAY_SIZE(sscreen->compiler));
	num_comp_lo_threads = MIN2(num_comp_lo_threads,
				   ARRAY_SIZE(sscreen->compiler_lowp));

	if (!util_queue_init(&sscreen->shader_compiler_queue, "sh",
			     64, num_comp_hi_threads,
			     UTIL_QUEUE_INIT_RESIZE_IF_FULL |
			     UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) {
		si_destroy_shader_cache(sscreen);
		FREE(sscreen);
		return NULL;
	}

	if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
			     "shlo",
			     64, num_comp_lo_threads,
			     UTIL_QUEUE_INIT_RESIZE_IF_FULL |
			     UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY |
			     UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
	       si_destroy_shader_cache(sscreen);
	       FREE(sscreen);
	       return NULL;
	}

	if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
		si_init_perfcounters(sscreen);

	/* Determine tessellation ring info. */
	bool double_offchip_buffers = sscreen->info.chip_class >= CIK &&
				      sscreen->info.family != CHIP_CARRIZO &&
				      sscreen->info.family != CHIP_STONEY;
	/* This must be one less than the maximum number due to a hw limitation.
	 * Various hardware bugs in SI, CIK, and GFX9 need this.
	 */
	unsigned max_offchip_buffers_per_se;

	/* Only certain chips can use the maximum value. */
	if (sscreen->info.family == CHIP_VEGA12 ||
	    sscreen->info.family == CHIP_VEGA20)
		max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
	else
		max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;

	unsigned max_offchip_buffers = max_offchip_buffers_per_se *
				       sscreen->info.max_se;
	unsigned offchip_granularity;

	/* Hawaii has a bug with offchip buffers > 256 that can be worked
	 * around by setting 4K granularity.
	 */
	if (sscreen->info.family == CHIP_HAWAII) {
		sscreen->tess_offchip_block_dw_size = 4096;
		offchip_granularity = V_03093C_X_4K_DWORDS;
	} else {
		sscreen->tess_offchip_block_dw_size = 8192;
		offchip_granularity = V_03093C_X_8K_DWORDS;
	}

	sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
	assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0);
	sscreen->tess_offchip_ring_size = max_offchip_buffers *
					  sscreen->tess_offchip_block_dw_size * 4;

	if (sscreen->info.chip_class >= CIK) {
		if (sscreen->info.chip_class >= VI)
			--max_offchip_buffers;
		sscreen->vgt_hs_offchip_param =
			S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
			S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
	} else {
		assert(offchip_granularity == V_03093C_X_8K_DWORDS);
		sscreen->vgt_hs_offchip_param =
			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
	}

	/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
        * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
        * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/
       sscreen->has_clear_state = sscreen->info.chip_class >= CIK &&
                                  sscreen->info.drm_major == 3;

	sscreen->has_distributed_tess =
		sscreen->info.chip_class >= VI &&
		sscreen->info.max_se >= 2;

	sscreen->has_draw_indirect_multi =
		(sscreen->info.family >= CHIP_POLARIS10) ||
		(sscreen->info.chip_class == VI &&
		 sscreen->info.pfp_fw_version >= 121 &&
		 sscreen->info.me_fw_version >= 87) ||
		(sscreen->info.chip_class == CIK &&
		 sscreen->info.pfp_fw_version >= 211 &&
		 sscreen->info.me_fw_version >= 173) ||
		(sscreen->info.chip_class == SI &&
		 sscreen->info.pfp_fw_version >= 79 &&
		 sscreen->info.me_fw_version >= 142);

	sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI &&
					 sscreen->info.max_se >= 2 &&
					 !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
	sscreen->assume_no_z_fights =
		driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
	sscreen->commutative_blend_add =
		driQueryOptionb(config->options, "radeonsi_commutative_blend_add");

	{
#define OPT_BOOL(name, dflt, description) \
		sscreen->options.name = \
			driQueryOptionb(config->options, "radeonsi_"#name);
#include "si_debug_options.h"
	}

	sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
					sscreen->info.family == CHIP_RAVEN;
	sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
					    sscreen->info.family <= CHIP_POLARIS12) ||
					   sscreen->info.family == CHIP_VEGA10 ||
					   sscreen->info.family == CHIP_RAVEN;
	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
					sscreen->info.family == CHIP_RAVEN;
	sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2;

	/* Only enable primitive binning on APUs by default. */
	sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN ||
				sscreen->info.family == CHIP_RAVEN2;

	sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN ||
				sscreen->info.family == CHIP_RAVEN2;

	/* Process DPBB enable flags. */
	if (sscreen->debug_flags & DBG(DPBB)) {
		sscreen->dpbb_allowed = true;
		if (sscreen->debug_flags & DBG(DFSM))
			sscreen->dfsm_allowed = true;
	}

	/* Process DPBB disable flags. */
	if (sscreen->debug_flags & DBG(NO_DPBB)) {
		sscreen->dpbb_allowed = false;
		sscreen->dfsm_allowed = false;
	} else if (sscreen->debug_flags & DBG(NO_DFSM)) {
		sscreen->dfsm_allowed = false;
	}

	/* While it would be nice not to have this flag, we are constrained
	 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
	 * on GFX9.
	 */
	sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI;

	/* Some chips have RB+ registers, but don't support RB+. Those must
	 * always disable it.
	 */
	if (sscreen->info.family == CHIP_STONEY ||
	    sscreen->info.chip_class >= GFX9) {
		sscreen->has_rbplus = true;

		sscreen->rbplus_allowed =
			!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
			(sscreen->info.family == CHIP_STONEY ||
			 sscreen->info.family == CHIP_VEGA12 ||
			 sscreen->info.family == CHIP_RAVEN ||
			 sscreen->info.family == CHIP_RAVEN2);
	}

	sscreen->dcc_msaa_allowed =
		!(sscreen->debug_flags & DBG(NO_DCC_MSAA));

	sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;

	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
	sscreen->use_monolithic_shaders =
		(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;

	sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
					    SI_CONTEXT_INV_VMEM_L1;
	if (sscreen->info.chip_class <= VI) {
		sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
		sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
	}

	if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
		sscreen->debug_flags |= DBG_ALL_SHADERS;

	/* Syntax:
	 *     EQAA=s,z,c
	 * Example:
	 *     EQAA=8,4,2

	 * That means 8 coverage samples, 4 Z/S samples, and 2 color samples.
	 * Constraints:
	 *     s >= z >= c (ignoring this only wastes memory)
	 *     s = [2..16]
	 *     z = [2..8]
	 *     c = [2..8]
	 *
	 * Only MSAA color and depth buffers are overriden.
	 */
	if (sscreen->info.has_eqaa_surface_allocator) {
		const char *eqaa = debug_get_option("EQAA", NULL);
		unsigned s,z,f;

		if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) {
			sscreen->eqaa_force_coverage_samples = s;
			sscreen->eqaa_force_z_samples = z;
			sscreen->eqaa_force_color_samples = f;
		}
	}

	for (i = 0; i < num_comp_hi_threads; i++)
		si_init_compiler(sscreen, &sscreen->compiler[i]);
	for (i = 0; i < num_comp_lo_threads; i++)
		si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);

	/* Create the auxiliary context. This must be done last. */
	sscreen->aux_context = si_create_context(
		&sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0);
	if (sscreen->options.aux_debug) {
		struct u_log_context *log = CALLOC_STRUCT(u_log_context);
		u_log_context_init(log);
		sscreen->aux_context->set_log_context(sscreen->aux_context, log);
	}

	if (sscreen->debug_flags & DBG(TEST_DMA))
		si_test_dma(sscreen);

	if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) {
		si_test_dma_perf(sscreen);
	}

	if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
				      DBG(TEST_VMFAULT_SDMA) |
				      DBG(TEST_VMFAULT_SHADER)))
		si_test_vmfault(sscreen);

	if (sscreen->debug_flags & DBG(TEST_GDS))
		si_test_gds((struct si_context*)sscreen->aux_context);

	if (sscreen->debug_flags & DBG(TEST_GDS_MM)) {
		si_test_gds_memory_management((struct si_context*)sscreen->aux_context,
					      32 * 1024, 4, RADEON_DOMAIN_GDS);
	}
	if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) {
		si_test_gds_memory_management((struct si_context*)sscreen->aux_context,
					      4, 1, RADEON_DOMAIN_OA);
	}

	return &sscreen->b;
}
Exemple #12
0
struct pipe_screen *
nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
    static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32};
    struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen);
    struct nouveau_channel *chan;
    struct pipe_screen *pscreen;
    unsigned eng3d_class = 0;
    int ret, i;

    if (!screen)
        return NULL;

    pscreen = &screen->base.base;

    ret = nouveau_screen_init(&screen->base, dev);
    if (ret) {
        nvfx_screen_destroy(pscreen);
        return NULL;
    }
    chan = screen->base.channel;
    screen->cur_ctx = NULL;
    chan->user_private = screen;
    chan->flush_notify = nvfx_channel_flush_notify;

    pscreen->winsys = ws;
    pscreen->destroy = nvfx_screen_destroy;
    pscreen->get_param = nvfx_screen_get_param;
    pscreen->get_shader_param = nvfx_screen_get_shader_param;
    pscreen->get_paramf = nvfx_screen_get_paramf;
    pscreen->is_format_supported = nvfx_screen_is_format_supported;
    pscreen->context_create = nvfx_create;

    switch (dev->chipset & 0xf0) {
    case 0x30:
        if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
            eng3d_class = NV30_3D;
        else if (NV34_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
            eng3d_class = NV34_3D;
        else if (NV35_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
            eng3d_class = NV35_3D;
        break;
    case 0x40:
        if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
            eng3d_class = NV40_3D;
        else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
            eng3d_class = NV44_3D;
        screen->is_nv4x = ~0;
        break;
    case 0x60:
        if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
            eng3d_class = NV44_3D;
        screen->is_nv4x = ~0;
        break;
    }

    if (!eng3d_class) {
        NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset);
        return NULL;
    }

    screen->advertise_npot = !!screen->is_nv4x;
    screen->advertise_blend_equation_separate = !!screen->is_nv4x;
    screen->use_nv4x = screen->is_nv4x;

    if(screen->is_nv4x) {
        if(debug_get_bool_option("NVFX_SIMULATE_NV30", FALSE))
            screen->use_nv4x = 0;
        if(!debug_get_bool_option("NVFX_NPOT", TRUE))
            screen->advertise_npot = 0;
        if(!debug_get_bool_option("NVFX_BLEND_EQ_SEP", TRUE))
            screen->advertise_blend_equation_separate = 0;
    }

    screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE);
    screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);

    screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
    screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
    screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));

    /* We don't advertise these by default because filtering and blending doesn't work as
     * it should, due to several restrictions.
     * The only exception is fp16 on nv40.
     */
    screen->advertise_fp16 = debug_get_bool_option("NVFX_FP16", !!screen->use_nv4x);
    screen->advertise_fp32 = debug_get_bool_option("NVFX_FP32", 0);

    screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);

    /* surely both nv3x and nv44 support index buffers too: find out how and test that */
    if(eng3d_class == NV40_3D)
        screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags;

    if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags)
        screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags;

    nvfx_screen_init_resource_functions(pscreen);

    ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d);
    if (ret) {
        NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
        return FALSE;
    }

    /* 2D engine setup */
    nvfx_screen_surface_init(pscreen);

    /* Notifier for sync purposes */
    ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
    if (ret) {
        NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
        nvfx_screen_destroy(pscreen);
        return NULL;
    }

    /* Query objects */
    for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i)
    {
        ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query);
        if(!ret)
            break;
    }

    if (ret) {
        NOUVEAU_ERR("Error initialising query objects: %d\n", ret);
        nvfx_screen_destroy(pscreen);
        return NULL;
    }

    ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]);
    if (ret) {
        NOUVEAU_ERR("Error initialising query object heap: %d\n", ret);
        nvfx_screen_destroy(pscreen);
        return NULL;
    }

    LIST_INITHEAD(&screen->query_list);

    /* Vtxprog resources */
    if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->use_nv4x ? 512 : 256) ||
            nouveau_resource_init(&screen->vp_data_heap, 0, screen->use_nv4x ? 468 : 256)) {
        nvfx_screen_destroy(pscreen);
        return NULL;
    }

    BIND_RING(chan, screen->eng3d, 7);

    /* Static eng3d initialisation */
    /* note that we just started using the channel, so we must have space in the pushbuffer */
    OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1));
    OUT_RING(chan, screen->sync->handle);
    OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2));
    OUT_RING(chan, chan->vram->handle);
    OUT_RING(chan, chan->gart->handle);
    OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1));
    OUT_RING(chan, chan->vram->handle);
    OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2));
    OUT_RING(chan, chan->vram->handle);
    OUT_RING(chan, chan->vram->handle);
    OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2));
    OUT_RING(chan, chan->vram->handle);
    OUT_RING(chan, chan->gart->handle);

    OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2));
    OUT_RING(chan, 0);
    OUT_RING(chan, screen->query->handle);

    OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2));
    OUT_RING(chan, chan->vram->handle);
    OUT_RING(chan, chan->vram->handle);

    if(!screen->is_nv4x)
        nv30_screen_init(screen);
    else
        nv40_screen_init(screen);

    return pscreen;
}
Exemple #13
0
struct pipe_screen *
nv30_screen_create(struct nouveau_device *dev)
{
   struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
   struct pipe_screen *pscreen;
   struct nouveau_pushbuf *push;
   struct nv04_fifo *fifo;
   unsigned oclass = 0;
   int ret, i;

   if (!screen)
      return NULL;

   switch (dev->chipset & 0xf0) {
   case 0x30:
      if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f)))
         oclass = NV30_3D_CLASS;
      else
      if (RANKINE_0697_CHIPSET & (1 << (dev->chipset & 0x0f)))
         oclass = NV34_3D_CLASS;
      else
      if (RANKINE_0497_CHIPSET & (1 << (dev->chipset & 0x0f)))
         oclass = NV35_3D_CLASS;
      break;
   case 0x40:
      if (CURIE_4097_CHIPSET & (1 << (dev->chipset & 0x0f)))
         oclass = NV40_3D_CLASS;
      else
      if (CURIE_4497_CHIPSET & (1 << (dev->chipset & 0x0f)))
         oclass = NV44_3D_CLASS;
      break;
   case 0x60:
      if (CURIE_4497_CHIPSET6X & (1 << (dev->chipset & 0x0f)))
         oclass = NV44_3D_CLASS;
      break;
   default:
      break;
   }

   if (!oclass) {
      NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset);
      FREE(screen);
      return NULL;
   }

   /*
    * Some modern apps try to use msaa without keeping in mind the
    * restrictions on videomem of older cards. Resulting in dmesg saying:
    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
    *
    * Because we are running out of video memory, after which the program
    * using the msaa visual freezes, and eventually the entire system freezes.
    *
    * To work around this we do not allow msaa visauls by default and allow
    * the user to override this via NV30_MAX_MSAA.
    */
   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
   if (screen->max_sample_count > 4)
      screen->max_sample_count = 4;

   pscreen = &screen->base.base;
   pscreen->destroy = nv30_screen_destroy;
   pscreen->get_param = nv30_screen_get_param;
   pscreen->get_paramf = nv30_screen_get_paramf;
   pscreen->get_shader_param = nv30_screen_get_shader_param;
   pscreen->context_create = nv30_context_create;
   pscreen->is_format_supported = nv30_screen_is_format_supported;
   nv30_resource_screen_init(pscreen);
   nouveau_screen_init_vdec(&screen->base);

   screen->base.fence.emit = nv30_screen_fence_emit;
   screen->base.fence.update = nv30_screen_fence_update;

   ret = nouveau_screen_init(&screen->base, dev);
   if (ret)
      FAIL_SCREEN_INIT("nv30_screen_init failed: %d\n", ret);

   screen->base.vidmem_bindings |= PIPE_BIND_VERTEX_BUFFER;
   screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER;
   if (oclass == NV40_3D_CLASS) {
      screen->base.vidmem_bindings |= PIPE_BIND_INDEX_BUFFER;
      screen->base.sysmem_bindings |= PIPE_BIND_INDEX_BUFFER;
   }

   fifo = screen->base.channel->data;
   push = screen->base.pushbuf;
   push->rsvd_kick = 16;

   ret = nouveau_object_new(screen->base.channel, 0x00000000, NV01_NULL_CLASS,
                            NULL, 0, &screen->null);
   if (ret)
      FAIL_SCREEN_INIT("error allocating null object: %d\n", ret);

   /* DMA_FENCE refuses to accept DMA objects with "adjust" filled in,
    * this means that the address pointed at by the DMA object must
    * be 4KiB aligned, which means this object needs to be the first
    * one allocated on the channel.
    */
   ret = nouveau_object_new(screen->base.channel, 0xbeef1e00,
                            NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) {
                            .length = 32 }, sizeof(struct nv04_notify),
/**
 * Enter a surface into the presentation queue.
 */
VdpStatus
vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
                              VdpOutputSurface surface,
                              uint32_t clip_width,
                              uint32_t clip_height,
                              VdpTime  earliest_presentation_time)
{
   static int dump_window = -1;

   vlVdpPresentationQueue *pq;
   vlVdpOutputSurface *surf;

   struct pipe_context *pipe;
   struct pipe_resource *tex;
   struct pipe_surface surf_templ, *surf_draw;
   struct u_rect src_rect, dst_clip, *dirty_area;

   struct vl_compositor *compositor;
   struct vl_compositor_state *cstate;

   pq = vlGetDataHTAB(presentation_queue);
   if (!pq)
      return VDP_STATUS_INVALID_HANDLE;

   surf = vlGetDataHTAB(surface);
   if (!surf)
      return VDP_STATUS_INVALID_HANDLE;

   pipe = pq->device->context;
   compositor = &pq->device->compositor;
   cstate = &pq->cstate;

   pipe_mutex_lock(pq->device->mutex);
   tex = vl_screen_texture_from_drawable(pq->device->vscreen, pq->drawable);
   if (!tex) {
      pipe_mutex_unlock(pq->device->mutex);
      return VDP_STATUS_INVALID_HANDLE;
   }

   dirty_area = vl_screen_get_dirty_area(pq->device->vscreen);

   memset(&surf_templ, 0, sizeof(surf_templ));
   surf_templ.format = tex->format;
   surf_templ.usage = PIPE_BIND_RENDER_TARGET;
   surf_draw = pipe->create_surface(pipe, tex, &surf_templ);

   surf->timestamp = (vlVdpTime)earliest_presentation_time;

   dst_clip.x0 = 0;
   dst_clip.y0 = 0;
   dst_clip.x1 = clip_width ? clip_width : surf_draw->width;
   dst_clip.y1 = clip_height ? clip_height : surf_draw->height;

   if (pq->device->delayed_rendering.surface == surface &&
       dst_clip.x1 == surf_draw->width && dst_clip.y1 == surf_draw->height) {

      // TODO: we correctly support the clipping here, but not the pq background color in the clipped area....
      cstate = pq->device->delayed_rendering.cstate;
      vl_compositor_set_dst_clip(cstate, &dst_clip);
      vlVdpResolveDelayedRendering(pq->device, surf_draw, dirty_area);

   } else {
      vlVdpResolveDelayedRendering(pq->device, NULL, NULL);

      src_rect.x0 = 0;
      src_rect.y0 = 0;
      src_rect.x1 = surf_draw->width;
      src_rect.y1 = surf_draw->height;

      vl_compositor_clear_layers(cstate);
      vl_compositor_set_rgba_layer(cstate, compositor, 0, surf->sampler_view, &src_rect, NULL, NULL);
      vl_compositor_set_dst_clip(cstate, &dst_clip);
      vl_compositor_render(cstate, compositor, surf_draw, dirty_area);
   }

   vl_screen_set_next_timestamp(pq->device->vscreen, earliest_presentation_time);
   pipe->screen->flush_frontbuffer
   (
      pipe->screen, tex, 0, 0,
      vl_screen_get_private(pq->device->vscreen)
   );

   pipe->screen->fence_reference(pipe->screen, &surf->fence, NULL);
   pipe->flush(pipe, &surf->fence);

   if (dump_window == -1) {
      dump_window = debug_get_num_option("VDPAU_DUMP", 0);
   }

   if (dump_window) {
      static unsigned int framenum = 0;
      char cmd[256];

      if (framenum) {
         sprintf(cmd, "xwd -id %d -silent -out vdpau_frame_%08d.xwd", (int)pq->drawable, framenum);
         if (system(cmd) != 0)
            VDPAU_MSG(VDPAU_ERR, "[VDPAU] Dumping surface %d failed.\n", surface);
      }
      framenum++;
   }

   pipe_resource_reference(&tex, NULL);
   pipe_surface_reference(&surf_draw, NULL);
   pipe_mutex_unlock(pq->device->mutex);

   return VDP_STATUS_OK;
}
Exemple #15
0
bool
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
   struct nv50_ir_prog_info *info;
   int ret;
   const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;

   info = CALLOC_STRUCT(nv50_ir_prog_info);
   if (!info)
      return false;

   info->type = prog->type;
   info->target = chipset;
   info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
   info->bin.source = (void *)prog->pipe.tokens;

   info->io.ucpCBSlot = 15;
   info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
   info->io.genUserClip = prog->vp.clpd_nr;
   info->io.sampleInterp = prog->fp.sample_interp;

   info->io.resInfoCBSlot = 15;
   info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
   info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
   info->io.msInfoCBSlot = 15;
   info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;

   info->assignSlots = nv50_program_assign_varying_slots;

   prog->vp.bfc[0] = 0xff;
   prog->vp.bfc[1] = 0xff;
   prog->vp.edgeflag = 0xff;
   prog->vp.clpd[0] = map_undef;
   prog->vp.clpd[1] = map_undef;
   prog->vp.psiz = map_undef;
   prog->gp.has_layer = 0;
   prog->gp.has_viewport = 0;

   info->driverPriv = prog;

#ifdef DEBUG
   info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
   info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
#else
   info->optLevel = 3;
#endif

   ret = nv50_ir_generate_code(info);
   if (ret) {
      NOUVEAU_ERR("shader translation failed: %i\n", ret);
      goto out;
   }
   FREE(info->bin.syms);

   prog->code = info->bin.code;
   prog->code_size = info->bin.codeSize;
   prog->fixups = info->bin.relocData;
   prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
   prog->tls_space = info->bin.tlsSpace;

   if (prog->type == PIPE_SHADER_FRAGMENT) {
      if (info->prop.fp.writesDepth) {
         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
         prog->fp.flags[1] = 0x11;
      }
      if (info->prop.fp.usesDiscard)
         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
   } else
   if (prog->type == PIPE_SHADER_GEOMETRY) {
      switch (info->prop.gp.outputPrim) {
      case PIPE_PRIM_LINE_STRIP:
         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
         break;
      case PIPE_PRIM_TRIANGLE_STRIP:
         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
         break;
      case PIPE_PRIM_POINTS:
      default:
         assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS);
         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
         break;
      }
      prog->gp.vert_count = info->prop.gp.maxVertices;
   }

   if (prog->pipe.stream_output.num_outputs)
      prog->so = nv50_program_create_strmout_state(info,
                                                   &prog->pipe.stream_output);

out:
   FREE(info);
   return !ret;
}
bool
nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
                       struct pipe_debug_callback *debug)
{
   struct nv50_ir_prog_info *info;
   int i, ret;
   const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;

   info = CALLOC_STRUCT(nv50_ir_prog_info);
   if (!info)
      return false;

   info->type = prog->type;
   info->target = chipset;

   info->bin.sourceRep = prog->pipe.type;
   switch (prog->pipe.type) {
   case PIPE_SHADER_IR_TGSI:
      info->bin.source = (void *)prog->pipe.tokens;
      break;
   case PIPE_SHADER_IR_NIR:
      info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir);
      break;
   default:
      assert(!"unsupported IR!");
      return false;
   }

   info->bin.smemSize = prog->cp.smem_size;
   info->io.auxCBSlot = 15;
   info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
   info->io.genUserClip = prog->vp.clpd_nr;
   if (prog->fp.alphatest)
      info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET;

   info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
   info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
   info->io.msInfoCBSlot = 15;
   info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;

   info->assignSlots = nv50_program_assign_varying_slots;

   prog->vp.bfc[0] = 0xff;
   prog->vp.bfc[1] = 0xff;
   prog->vp.edgeflag = 0xff;
   prog->vp.clpd[0] = map_undef;
   prog->vp.clpd[1] = map_undef;
   prog->vp.psiz = map_undef;
   prog->gp.has_layer = 0;
   prog->gp.has_viewport = 0;

   if (prog->type == PIPE_SHADER_COMPUTE)
      info->prop.cp.inputOffset = 0x10;

   info->driverPriv = prog;

#ifdef DEBUG
   info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
   info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
   info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0);
#else
   info->optLevel = 3;
#endif

   ret = nv50_ir_generate_code(info);
   if (ret) {
      NOUVEAU_ERR("shader translation failed: %i\n", ret);
      goto out;
   }

   prog->code = info->bin.code;
   prog->code_size = info->bin.codeSize;
   prog->fixups = info->bin.relocData;
   prog->interps = info->bin.fixupData;
   prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
   prog->tls_space = info->bin.tlsSpace;
   prog->cp.smem_size = info->bin.smemSize;
   prog->mul_zero_wins = info->io.mul_zero_wins;
   prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;

   prog->vp.clip_enable = (1 << info->io.clipDistances) - 1;
   prog->vp.cull_enable =
      ((1 << info->io.cullDistances) - 1) << info->io.clipDistances;
   prog->vp.clip_mode = 0;
   for (i = 0; i < info->io.cullDistances; ++i)
      prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);

   if (prog->type == PIPE_SHADER_FRAGMENT) {
      if (info->prop.fp.writesDepth) {
         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
         prog->fp.flags[1] = 0x11;
      }
      if (info->prop.fp.usesDiscard)
         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
   } else
   if (prog->type == PIPE_SHADER_GEOMETRY) {
      switch (info->prop.gp.outputPrim) {
      case PIPE_PRIM_LINE_STRIP:
         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
         break;
      case PIPE_PRIM_TRIANGLE_STRIP:
         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
         break;
      case PIPE_PRIM_POINTS:
      default:
         assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS);
         prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
         break;
      }
      prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024);
   }

   if (prog->type == PIPE_SHADER_COMPUTE) {
      prog->cp.syms = info->bin.syms;
      prog->cp.num_syms = info->bin.numSyms;
   } else {
      FREE(info->bin.syms);
   }

   if (prog->pipe.stream_output.num_outputs)
      prog->so = nv50_program_create_strmout_state(info,
                                                   &prog->pipe.stream_output);

   pipe_debug_message(debug, SHADER_INFO,
                      "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d",
                      prog->type, info->bin.tlsSpace, info->bin.smemSize,
                      prog->max_gpr, info->bin.instructions,
                      info->bin.codeSize);

out:
   if (info->bin.sourceRep == PIPE_SHADER_IR_NIR)
      ralloc_free((void *)info->bin.source);
   FREE(info);
   return !ret;
}
Exemple #17
0
bool r600_common_screen_init(struct r600_common_screen *rscreen,
			     struct radeon_winsys *ws)
{
	char llvm_string[32] = {}, kernel_version[128] = {};
	struct utsname uname_data;

	ws->query_info(ws, &rscreen->info);

	if (uname(&uname_data) == 0)
		snprintf(kernel_version, sizeof(kernel_version),
			 " / %s", uname_data.release);

#if HAVE_LLVM
	snprintf(llvm_string, sizeof(llvm_string),
		 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
		 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
#endif

	snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
		 "%s (DRM %i.%i.%i%s%s)",
		 r600_get_chip_name(rscreen), rscreen->info.drm_major,
		 rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
		 kernel_version, llvm_string);

	rscreen->b.get_name = r600_get_name;
	rscreen->b.get_vendor = r600_get_vendor;
	rscreen->b.get_device_vendor = r600_get_device_vendor;
	rscreen->b.get_compute_param = r600_get_compute_param;
	rscreen->b.get_paramf = r600_get_paramf;
	rscreen->b.get_timestamp = r600_get_timestamp;
	rscreen->b.fence_finish = r600_fence_finish;
	rscreen->b.fence_reference = r600_fence_reference;
	rscreen->b.resource_destroy = u_resource_destroy_vtbl;
	rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
	rscreen->b.query_memory_info = r600_query_memory_info;

	if (rscreen->info.has_uvd) {
		rscreen->b.get_video_param = rvid_get_video_param;
		rscreen->b.is_video_format_supported = rvid_is_format_supported;
	} else {
		rscreen->b.get_video_param = r600_get_video_param;
		rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
	}

	r600_init_screen_texture_functions(rscreen);
	r600_init_screen_query_functions(rscreen);

	rscreen->ws = ws;
	rscreen->family = rscreen->info.family;
	rscreen->chip_class = rscreen->info.chip_class;
	rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);

	rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
	if (rscreen->force_aniso >= 0) {
		printf("radeon: Forcing anisotropy filter to %ix\n",
		       /* round down to a power of two */
		       1 << util_logbase2(rscreen->force_aniso));
	}

	util_format_s3tc_init();
	pipe_mutex_init(rscreen->aux_context_lock);
	pipe_mutex_init(rscreen->gpu_load_mutex);

	if (rscreen->debug_flags & DBG_INFO) {
		printf("pci_id = 0x%x\n", rscreen->info.pci_id);
		printf("family = %i (%s)\n", rscreen->info.family,
		       r600_get_chip_name(rscreen));
		printf("chip_class = %i\n", rscreen->info.chip_class);
		printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
		printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
		printf("max_alloc_size = %i MB\n",
		       (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
		printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
		printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
		printf("has_sdma = %i\n", rscreen->info.has_sdma);
		printf("has_uvd = %i\n", rscreen->info.has_uvd);
		printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
		printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
		printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
		printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
		       rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
		printf("has_userptr = %i\n", rscreen->info.has_userptr);

		printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
		printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
		printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
		printf("max_se = %i\n", rscreen->info.max_se);
		printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);

		printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
		printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
		printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
		printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
		printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
		printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
	}
	return true;
}
boolean
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
   struct nv50_ir_prog_info *info;
   int ret;
   const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;

   info = CALLOC_STRUCT(nv50_ir_prog_info);
   if (!info)
      return FALSE;

   info->type = prog->type;
   info->target = chipset;
   info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
   info->bin.source = (void *)prog->pipe.tokens;

   info->io.ucpCBSlot = 15;
   info->io.ucpBase = 0;
   info->io.genUserClip = prog->vp.clpd_nr;

   info->assignSlots = nv50_program_assign_varying_slots;

   prog->vp.bfc[0] = 0xff;
   prog->vp.bfc[1] = 0xff;
   prog->vp.edgeflag = 0xff;
   prog->vp.clpd[0] = map_undef;
   prog->vp.clpd[1] = map_undef;
   prog->vp.psiz = map_undef;
   prog->gp.primid = 0x80;

   info->driverPriv = prog;

#ifdef DEBUG
   info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
   info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
#else
   info->optLevel = 3;
#endif

   ret = nv50_ir_generate_code(info);
   if (ret) {
      NOUVEAU_ERR("shader translation failed: %i\n", ret);
      goto out;
   }
   FREE(info->bin.syms);

   prog->code = info->bin.code;
   prog->code_size = info->bin.codeSize;
   prog->fixups = info->bin.relocData;
   prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
   prog->tls_space = info->bin.tlsSpace;

   if (prog->type == PIPE_SHADER_FRAGMENT) {
      if (info->prop.fp.writesDepth) {
         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
         prog->fp.flags[1] = 0x11;
      }
      if (info->prop.fp.usesDiscard)
         prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
   }

   if (prog->pipe.stream_output.num_outputs)
      prog->so = nv50_program_create_strmout_state(info,
                                                   &prog->pipe.stream_output);

out:
   FREE(info);
   return !ret;
}