static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) { struct radeon_llvm_diagnostics *diag = (struct radeon_llvm_diagnostics *)context; LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); char *description = LLVMGetDiagInfoDescription(di); const char *severity_str = NULL; switch (severity) { case LLVMDSError: severity_str = "error"; break; case LLVMDSWarning: severity_str = "warning"; break; case LLVMDSRemark: severity_str = "remark"; break; case LLVMDSNote: severity_str = "note"; break; default: severity_str = "unknown"; } pipe_debug_message(diag->debug, SHADER_INFO, "LLVM diagnostic (%s): %s", severity_str, description); if (severity == LLVMDSError) { diag->retval = 1; fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); } LLVMDisposeMessage(description); }
static void dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug) { if (!unlikely(fd_mesa_debug & FD_DBG_SHADERDB)) return; pipe_debug_message(debug, SHADER_INFO, "%s shader: %u inst, %u dwords, " "%u half, %u full, %u const, %u constlen, " "%u (ss), %u (sy), %d max_sun, %d loops\n", ir3_shader_stage(v->shader), v->info.instrs_count, v->info.sizedwords, v->info.max_half_reg + 1, v->info.max_reg + 1, v->info.max_const + 1, v->constlen, v->info.ss, v->info.sy, v->max_sun, v->loops); }
/** * Define a vgpu10 blend state object for the given * svga blend state. */ static void define_blend_state_object(struct svga_context *svga, struct svga_blend_state *bs) { SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; unsigned try; int i; assert(svga_have_vgpu10(svga)); bs->id = util_bitmask_add(svga->blend_object_id_bm); for (i = 0; i < SVGA3D_DX_MAX_RENDER_TARGETS; i++) { perRT[i].blendEnable = bs->rt[i].blend_enable; perRT[i].srcBlend = bs->rt[i].srcblend; perRT[i].destBlend = bs->rt[i].dstblend; perRT[i].blendOp = bs->rt[i].blendeq; perRT[i].srcBlendAlpha = bs->rt[i].srcblend_alpha; perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha; perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha; perRT[i].renderTargetWriteMask = bs->rt[i].writemask; perRT[i].logicOpEnable = 0; perRT[i].logicOp = SVGA3D_LOGICOP_COPY; assert(perRT[i].srcBlend == perRT[0].srcBlend); } /* Loop in case command buffer is full and we need to flush and retry */ for (try = 0; try < 2; try++) { enum pipe_error ret; ret = SVGA3D_vgpu10_DefineBlendState(svga->swc, bs->id, bs->alpha_to_coverage, bs->independent_blend_enable, perRT); if (ret == PIPE_OK) return; svga_context_flush(svga, NULL); } } static void * svga_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *templ) { struct svga_context *svga = svga_context(pipe); struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); unsigned i; if (!blend) return NULL; /* Fill in the per-rendertarget blend state. We currently only * support independent blend enable and colormask per render target. */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* No way to set this in SVGA3D, and no way to correctly implement it on * top of D3D9 API. Instead we try to simulate with various blend modes. */ if (templ->logicop_enable) { switch (templ->logicop_func) { case PIPE_LOGICOP_XOR: case PIPE_LOGICOP_INVERT: blend->need_white_fragments = TRUE; blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; blend->rt[i].blendeq = SVGA3D_BLENDEQ_SUBTRACT; break; case PIPE_LOGICOP_CLEAR: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; break; case PIPE_LOGICOP_COPY: blend->rt[i].blend_enable = FALSE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; break; case PIPE_LOGICOP_COPY_INVERTED: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; break; case PIPE_LOGICOP_NOOP: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; break; case PIPE_LOGICOP_SET: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; break; case PIPE_LOGICOP_AND: /* Approximate with minimum - works for the 0 & anything case: */ blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; break; case PIPE_LOGICOP_AND_REVERSE: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; break; case PIPE_LOGICOP_AND_INVERTED: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; break; case PIPE_LOGICOP_OR: /* Approximate with maximum - works for the 1 | anything case: */ blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; break; case PIPE_LOGICOP_OR_REVERSE: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; break; case PIPE_LOGICOP_OR_INVERTED: blend->rt[i].blend_enable = TRUE; blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; break; case PIPE_LOGICOP_NAND: case PIPE_LOGICOP_NOR: case PIPE_LOGICOP_EQUIV: /* Fill these in with plausible values */ blend->rt[i].blend_enable = FALSE; blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; break; default: assert(0); break; } blend->rt[i].srcblend_alpha = blend->rt[i].srcblend; blend->rt[i].dstblend_alpha = blend->rt[i].dstblend; blend->rt[i].blendeq_alpha = blend->rt[i].blendeq; if (templ->logicop_func == PIPE_LOGICOP_XOR) { pipe_debug_message(&svga->debug.callback, CONFORMANCE, "XOR logicop mode has limited support"); } else if (templ->logicop_func != PIPE_LOGICOP_COPY) { pipe_debug_message(&svga->debug.callback, CONFORMANCE, "general logicops are not supported"); } } else { /* Note: the vgpu10 device does not yet support independent * blend terms per render target. Target[0] always specifies the * blending terms. */ if (templ->independent_blend_enable || templ->rt[0].blend_enable) { /* always use the 0th target's blending terms for now */ blend->rt[i].srcblend = svga_translate_blend_factor(svga, templ->rt[0].rgb_src_factor); blend->rt[i].dstblend = svga_translate_blend_factor(svga, templ->rt[0].rgb_dst_factor); blend->rt[i].blendeq = svga_translate_blend_func(templ->rt[0].rgb_func); blend->rt[i].srcblend_alpha = svga_translate_blend_factor(svga, templ->rt[0].alpha_src_factor); blend->rt[i].dstblend_alpha = svga_translate_blend_factor(svga, templ->rt[0].alpha_dst_factor); blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->rt[0].alpha_func); if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend || blend->rt[i].dstblend_alpha != blend->rt[i].dstblend || blend->rt[i].blendeq_alpha != blend->rt[i].blendeq) { blend->rt[i].separate_alpha_blend_enable = TRUE; } } else { /* disabled - default blend terms */ blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ONE; blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO; blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD; } if (templ->independent_blend_enable) { blend->rt[i].blend_enable = templ->rt[i].blend_enable; } else { blend->rt[i].blend_enable = templ->rt[0].blend_enable; } } /* Some GL blend modes are not supported by the VGPU9 device (there's * no equivalent of PIPE_BLENDFACTOR_[INV_]CONST_ALPHA). * When we set this flag, we copy the constant blend alpha value * to the R, G, B components. * This works as long as the src/dst RGB blend factors doesn't use * PIPE_BLENDFACTOR_CONST_COLOR and PIPE_BLENDFACTOR_CONST_ALPHA * at the same time. There's no work-around for that. */ if (!svga_have_vgpu10(svga)) { if (templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_CONST_ALPHA || templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_CONST_ALPHA || templ->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA || templ->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_CONST_ALPHA) { blend->blend_color_alpha = TRUE; } } if (templ->independent_blend_enable) { blend->rt[i].writemask = templ->rt[i].colormask; } else { blend->rt[i].writemask = templ->rt[0].colormask; } } blend->independent_blend_enable = templ->independent_blend_enable; blend->alpha_to_coverage = templ->alpha_to_coverage; if (svga_have_vgpu10(svga)) { define_blend_state_object(svga, blend); } svga->hud.num_blend_objects++; SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws, SVGA_STATS_COUNT_BLENDSTATE); return blend; } static void svga_bind_blend_state(struct pipe_context *pipe, void *blend) { struct svga_context *svga = svga_context(pipe); svga->curr.blend = (struct svga_blend_state*)blend; svga->dirty |= SVGA_NEW_BLEND; } static void svga_delete_blend_state(struct pipe_context *pipe, void *blend) { struct svga_context *svga = svga_context(pipe); struct svga_blend_state *bs = (struct svga_blend_state *) blend; if (bs->id != SVGA3D_INVALID_ID) { enum pipe_error ret; ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id); assert(ret == PIPE_OK); } if (bs->id == svga->state.hw_draw.blend_id) svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID; util_bitmask_clear(svga->blend_object_id_bm, bs->id); bs->id = SVGA3D_INVALID_ID; } FREE(blend); svga->hud.num_blend_objects--; } static void svga_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct svga_context *svga = svga_context(pipe); svga->curr.blend_color = *blend_color; svga->dirty |= SVGA_NEW_BLEND_COLOR; } void svga_init_blend_functions( struct svga_context *svga ) { svga->pipe.create_blend_state = svga_create_blend_state; svga->pipe.bind_blend_state = svga_bind_blend_state; svga->pipe.delete_blend_state = svga_delete_blend_state; svga->pipe.set_blend_color = svga_set_blend_color; }
static void define_rasterizer_object(struct svga_context *svga, struct svga_rasterizer_state *rast) { unsigned fill_mode = translate_fill_mode(rast->templ.fill_front); unsigned cull_mode = translate_cull_mode(rast->templ.cull_face); int depth_bias = rast->templ.offset_units; float slope_scaled_depth_bias = rast->templ.offset_scale; float depth_bias_clamp = 0.0; /* XXX fix me */ unsigned try; const float line_width = rast->templ.line_width > 0.0f ? rast->templ.line_width : 1.0f; const uint8 line_factor = rast->templ.line_stipple_enable ? rast->templ.line_stipple_factor : 0; const uint16 line_pattern = rast->templ.line_stipple_enable ? rast->templ.line_stipple_pattern : 0; rast->id = util_bitmask_add(svga->rast_object_id_bm); if (rast->templ.fill_front != rast->templ.fill_back) { /* The VGPU10 device can't handle different front/back fill modes. * We'll handle that with a swtnl/draw fallback. But we need to * make sure we always fill triangles in that case. */ fill_mode = SVGA3D_FILLMODE_FILL; } for (try = 0; try < 2; try++) { enum pipe_error ret = SVGA3D_vgpu10_DefineRasterizerState(svga->swc, rast->id, fill_mode, cull_mode, rast->templ.front_ccw, depth_bias, depth_bias_clamp, slope_scaled_depth_bias, rast->templ.depth_clip, rast->templ.scissor, rast->templ.multisample, rast->templ.line_smooth, line_width, rast->templ.line_stipple_enable, line_factor, line_pattern, !rast->templ.flatshade_first); if (ret == PIPE_OK) return; svga_context_flush(svga, NULL); } } static void * svga_create_rasterizer_state(struct pipe_context *pipe, const struct pipe_rasterizer_state *templ) { struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); struct svga_screen *screen = svga_screen(pipe->screen); /* need this for draw module. */ rast->templ = *templ; /* light_twoside - XXX: need fragment shader variant */ /* poly_smooth - XXX: no fallback available */ /* poly_stipple_enable - draw module */ /* sprite_coord_enable - ? */ /* point_quad_rasterization - ? */ /* point_size_per_vertex - ? */ /* sprite_coord_mode - ??? */ /* flatshade_first - handled by index translation */ /* half_pixel_center - XXX - viewport code */ /* line_width - draw module */ /* fill_cw, fill_ccw - draw module or index translation */ rast->shademode = svga_translate_flatshade( templ->flatshade ); rast->cullmode = svga_translate_cullmode( templ->cull_face, templ->front_ccw ); rast->scissortestenable = templ->scissor; rast->multisampleantialias = templ->multisample; rast->antialiasedlineenable = templ->line_smooth; rast->lastpixel = templ->line_last_pixel; rast->pointsprite = templ->sprite_coord_enable != 0x0; if (templ->point_smooth) { /* For smooth points we need to generate fragments for at least * a 2x2 region. Otherwise the quad we draw may be too small and * we may generate no fragments at all. */ rast->pointsize = MAX2(2.0f, templ->point_size); } else { rast->pointsize = templ->point_size; } rast->hw_fillmode = PIPE_POLYGON_MODE_FILL; /* Use swtnl + decomposition implement these: */ if (templ->line_width <= screen->maxLineWidth) { /* pass line width to device */ rast->linewidth = MAX2(1.0F, templ->line_width); } else if (svga->debug.no_line_width) { /* nothing */ } else { /* use 'draw' pipeline for wide line */ rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; rast->need_pipeline_lines_str = "line width"; } if (templ->line_stipple_enable) { if (screen->haveLineStipple || svga->debug.force_hw_line_stipple) { SVGA3dLinePattern lp; lp.repeat = templ->line_stipple_factor + 1; lp.pattern = templ->line_stipple_pattern; rast->linepattern = lp.uintValue; } else { /* use 'draw' module to decompose into short line segments */ rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; rast->need_pipeline_lines_str = "line stipple"; } } if (!svga_have_vgpu10(svga) && templ->point_smooth) { rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; rast->need_pipeline_points_str = "smooth points"; } if (templ->line_smooth && !screen->haveLineSmooth) { /* * XXX: Enabling the pipeline slows down performance immensely, so ignore * line smooth state, where there is very little visual improvement. * Smooth lines will still be drawn for wide lines. */ #if 0 rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; rast->need_pipeline_lines_str = "smooth lines"; #endif } { int fill_front = templ->fill_front; int fill_back = templ->fill_back; int fill = PIPE_POLYGON_MODE_FILL; boolean offset_front = util_get_offset(templ, fill_front); boolean offset_back = util_get_offset(templ, fill_back); boolean offset = FALSE; switch (templ->cull_face) { case PIPE_FACE_FRONT_AND_BACK: offset = FALSE; fill = PIPE_POLYGON_MODE_FILL; break; case PIPE_FACE_FRONT: offset = offset_front; fill = fill_front; break; case PIPE_FACE_BACK: offset = offset_back; fill = fill_back; break; case PIPE_FACE_NONE: if (fill_front != fill_back || offset_front != offset_back) { /* Always need the draw module to work out different * front/back fill modes: */ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; rast->need_pipeline_tris_str = "different front/back fillmodes"; } else { offset = offset_front; fill = fill_front; } break; default: assert(0); break; } /* Unfilled primitive modes aren't implemented on all virtual * hardware. We can do some unfilled processing with index * translation, but otherwise need the draw module: */ if (fill != PIPE_POLYGON_MODE_FILL && (templ->flatshade || templ->light_twoside || offset || templ->cull_face != PIPE_FACE_NONE)) { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; rast->need_pipeline_tris_str = "unfilled primitives with no index manipulation"; } /* If we are decomposing to lines, and lines need the pipeline, * then we also need the pipeline for tris. */ if (fill == PIPE_POLYGON_MODE_LINE && (rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)) { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; rast->need_pipeline_tris_str = "decomposing lines"; } /* Similarly for points: */ if (fill == PIPE_POLYGON_MODE_POINT && (rast->need_pipeline & SVGA_PIPELINE_FLAG_POINTS)) { fill = PIPE_POLYGON_MODE_FILL; rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; rast->need_pipeline_tris_str = "decomposing points"; } if (offset) { rast->slopescaledepthbias = templ->offset_scale; rast->depthbias = templ->offset_units; } rast->hw_fillmode = fill; } if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { /* Turn off stuff which will get done in the draw module: */ rast->hw_fillmode = PIPE_POLYGON_MODE_FILL; rast->slopescaledepthbias = 0; rast->depthbias = 0; } if (0 && rast->need_pipeline) { debug_printf("svga: rast need_pipeline = 0x%x\n", rast->need_pipeline); debug_printf(" pnts: %s \n", rast->need_pipeline_points_str); debug_printf(" lins: %s \n", rast->need_pipeline_lines_str); debug_printf(" tris: %s \n", rast->need_pipeline_tris_str); } if (svga_have_vgpu10(svga)) { define_rasterizer_object(svga, rast); } if (templ->poly_smooth) { pipe_debug_message(&svga->debug.callback, CONFORMANCE, "GL_POLYGON_SMOOTH not supported"); } svga->hud.num_state_objects++; return rast; } static void svga_bind_rasterizer_state( struct pipe_context *pipe, void *state ) { struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state; if (!raster || !svga->curr.rast || raster->templ.poly_stipple_enable != svga->curr.rast->templ.poly_stipple_enable) { svga->dirty |= SVGA_NEW_STIPPLE; } svga->curr.rast = raster; svga->dirty |= SVGA_NEW_RAST; } static void svga_delete_rasterizer_state(struct pipe_context *pipe, void *state) { struct svga_context *svga = svga_context(pipe); struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *) state; if (svga_have_vgpu10(svga)) { enum pipe_error ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id); } if (raster->id == svga->state.hw_draw.rasterizer_id) svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID; util_bitmask_clear(svga->rast_object_id_bm, raster->id); } FREE(state); svga->hud.num_state_objects--; } void svga_init_rasterizer_functions( struct svga_context *svga ) { svga->pipe.create_rasterizer_state = svga_create_rasterizer_state; svga->pipe.bind_rasterizer_state = svga_bind_rasterizer_state; svga->pipe.delete_rasterizer_state = svga_delete_rasterizer_state; }
bool nv50_program_translate(struct nv50_program *prog, uint16_t chipset, struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; int i, ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) return false; info->type = prog->type; info->target = chipset; info->bin.sourceRep = prog->pipe.type; switch (prog->pipe.type) { case PIPE_SHADER_IR_TGSI: info->bin.source = (void *)prog->pipe.tokens; break; case PIPE_SHADER_IR_NIR: info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir); break; default: assert(!"unsupported IR!"); return false; } info->bin.smemSize = prog->cp.smem_size; info->io.auxCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; if (prog->fp.alphatest) info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; info->io.msInfoCBSlot = 15; info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; info->assignSlots = nv50_program_assign_varying_slots; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; prog->vp.edgeflag = 0xff; prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; prog->gp.has_layer = 0; prog->gp.has_viewport = 0; if (prog->type == PIPE_SHADER_COMPUTE) info->prop.cp.inputOffset = 0x10; info->driverPriv = prog; #ifdef DEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0); #else info->optLevel = 3; #endif ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; prog->interps = info->bin.fixupData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; prog->cp.smem_size = info->bin.smemSize; prog->mul_zero_wins = info->io.mul_zero_wins; prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; prog->vp.clip_enable = (1 << info->io.clipDistances) - 1; prog->vp.cull_enable = ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; prog->vp.clip_mode = 0; for (i = 0; i < info->io.cullDistances; ++i) prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } else if (prog->type == PIPE_SHADER_GEOMETRY) { switch (info->prop.gp.outputPrim) { case PIPE_PRIM_LINE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; break; case PIPE_PRIM_TRIANGLE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; break; case PIPE_PRIM_POINTS: default: assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS); prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; break; } prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024); } if (prog->type == PIPE_SHADER_COMPUTE) { prog->cp.syms = info->bin.syms; prog->cp.num_syms = info->bin.numSyms; } else { FREE(info->bin.syms); } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); pipe_debug_message(debug, SHADER_INFO, "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d", prog->type, info->bin.tlsSpace, info->bin.smemSize, prog->max_gpr, info->bin.instructions, info->bin.codeSize); out: if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) ralloc_free((void *)info->bin.source); FREE(info); return !ret; }
enum pipe_error svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl, enum pipe_prim_type prim, unsigned start, unsigned count, unsigned start_instance, unsigned instance_count) { enum pipe_prim_type gen_prim; unsigned gen_size, gen_nr; enum indices_mode gen_type; u_generate_func gen_func; enum pipe_error ret = PIPE_OK; unsigned api_pv = hwtnl->api_pv; struct svga_context *svga = hwtnl->svga; if (svga->curr.rast->templ.fill_front != svga->curr.rast->templ.fill_back) { assert(hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL); } if (svga->curr.rast->templ.flatshade && svga->state.hw_draw.fs->constant_color_output) { /* The fragment color is a constant, not per-vertex so the whole * primitive will be the same color (except for possible blending). * We can ignore the current provoking vertex state and use whatever * the hardware wants. */ api_pv = hwtnl->hw_pv; if (hwtnl->api_fillmode == PIPE_POLYGON_MODE_FILL) { /* Do some simple primitive conversions to avoid index buffer * generation below. Note that polygons and quads are not directly * supported by the svga device. Also note, we can only do this * for flat/constant-colored rendering because of provoking vertex. */ if (prim == PIPE_PRIM_POLYGON) { prim = PIPE_PRIM_TRIANGLE_FAN; } else if (prim == PIPE_PRIM_QUADS && count == 4) { prim = PIPE_PRIM_TRIANGLE_FAN; } } } if (svga_need_unfilled_fallback(hwtnl, prim)) { /* Convert unfilled polygons into points, lines, triangles */ gen_type = u_unfilled_generator(prim, start, count, hwtnl->api_fillmode, &gen_prim, &gen_size, &gen_nr, &gen_func); } else { /* Convert PIPE_PRIM_LINE_LOOP to PIPE_PRIM_LINESTRIP, * convert PIPE_PRIM_POLYGON to PIPE_PRIM_TRIANGLE_FAN, * etc, if needed (as determined by svga_hw_prims mask). */ gen_type = u_index_generator(svga_hw_prims, prim, start, count, api_pv, hwtnl->hw_pv, &gen_prim, &gen_size, &gen_nr, &gen_func); } if (gen_type == U_GENERATE_LINEAR) { return simple_draw_arrays(hwtnl, gen_prim, start, count, start_instance, instance_count); } else { struct pipe_resource *gen_buf = NULL; /* Need to draw as indexed primitive. * Potentially need to run the gen func to build an index buffer. */ ret = retrieve_or_generate_indices(hwtnl, prim, gen_type, gen_nr, gen_size, gen_func, &gen_buf); if (ret != PIPE_OK) goto done; pipe_debug_message(&svga->debug.callback, PERF_INFO, "generating temporary index buffer for drawing %s", u_prim_name(prim)); ret = svga_hwtnl_simple_draw_range_elements(hwtnl, gen_buf, gen_size, start, 0, count - 1, gen_prim, 0, gen_nr, start_instance, instance_count); if (ret != PIPE_OK) goto done; done: if (gen_buf) pipe_resource_reference(&gen_buf, NULL); return ret; } }
/** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, const char *gpu_family, LLVMTargetMachineRef tm, struct pipe_debug_callback *debug) { struct radeon_llvm_diagnostics diag; char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; bool dispose_tm = false; LLVMContextRef llvm_ctx; LLVMMemoryBufferRef out_buffer; unsigned buffer_size; const char *buffer_data; char triple[TRIPLE_STRING_LEN]; LLVMBool mem_err; diag.debug = debug; diag.retval = 0; if (!tm) { strncpy(triple, "r600--", TRIPLE_STRING_LEN); LLVMTargetRef target = radeon_llvm_get_r600_target(triple); if (!target) { return 1; } strncpy(cpu, gpu_family, CPU_STRING_LEN); memset(fs, 0, sizeof(fs)); strncpy(fs, "+DumpCode", FS_STRING_LEN); tm = LLVMCreateTargetMachine(target, triple, cpu, fs, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); dispose_tm = true; } /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag); /* Compile IR*/ mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); /* Process Errors/Warnings */ if (mem_err) { fprintf(stderr, "%s: %s", __FUNCTION__, err); pipe_debug_message(debug, SHADER_INFO, "LLVM emit error: %s", err); FREE(err); diag.retval = 1; goto out; } /* Extract Shader Code*/ buffer_size = LLVMGetBufferSize(out_buffer); buffer_data = LLVMGetBufferStart(out_buffer); radeon_elf_read(buffer_data, buffer_size, binary); /* Clean up */ LLVMDisposeMemoryBuffer(out_buffer); out: if (dispose_tm) { LLVMDisposeTargetMachine(tm); } if (diag.retval != 0) pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); return diag.retval; }