void r300_emit_scissor_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); BEGIN_CS(size); OUT_CS_REG_SEQ(R300_SC_CLIPRECT_TL_0, 2); if (r300->screen->caps.is_r500) { OUT_CS((scissor->minx << R300_CLIPRECT_X_SHIFT) | (scissor->miny << R300_CLIPRECT_Y_SHIFT)); OUT_CS(((scissor->maxx - 1) << R300_CLIPRECT_X_SHIFT) | ((scissor->maxy - 1) << R300_CLIPRECT_Y_SHIFT)); } else { OUT_CS(((scissor->minx + 1440) << R300_CLIPRECT_X_SHIFT) | ((scissor->miny + 1440) << R300_CLIPRECT_Y_SHIFT)); OUT_CS(((scissor->maxx + 1440-1) << R300_CLIPRECT_X_SHIFT) | ((scissor->maxy + 1440-1) << R300_CLIPRECT_Y_SHIFT)); } END_CS; }
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) { struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; uint32_t height = fb->height; uint32_t width = fb->width; CS_LOCALS(r300); if (r300->cbzb_clear) { struct r300_surface *surf = r300_surface(fb->cbufs[0]); height = surf->cbzb_height; width = surf->cbzb_width; } DBG(r300, DBG_SCISSOR, "r300: Scissor width: %i, height: %i, CBZB clear: %s\n", width, height, r300->cbzb_clear ? "YES" : "NO"); BEGIN_CS(size); /* Set up scissors. * By writing to the SC registers, SC & US assert idle. */ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); if (r300->screen->caps.is_r500) { OUT_CS(0); OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | ((height - 1) << R300_SCISSORS_Y_SHIFT)); } else { OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | (1440 << R300_SCISSORS_Y_SHIFT)); OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | ((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); } /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ OUT_CS_TABLE(gpuflush->cb_flush_clean, 6); END_CS; }
void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.size); /* Set the pointer to our vertex buffer. The emitted values are this: * PACKET3 [3D_LOAD_VBPNTR] * COUNT [1] * FORMAT [size | stride << 8] * OFFSET [offset into BO] * VBPNTR [relocated BO] */ BEGIN_CS(7); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); OUT_CS(0); assert(r300->vbo_cs); OUT_CS(0xc0001000); /* PKT3_NOP */ OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4); END_CS; }
static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) { CS_LOCALS(r300); BEGIN_CS(4); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode)); END_CS; }
void r300_emit_rs_block_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_block* rs = (struct r300_rs_block*)state; unsigned i; /* It's the same for both INST and IP tables */ unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; CS_LOCALS(r300); if (DBG_ON(r300, DBG_RS_BLOCK)) { r500_dump_rs_block(rs); fprintf(stderr, "r300: RS emit:\n"); for (i = 0; i < count; i++) fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]); for (i = 0; i < count; i++) fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]); fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n", rs->count, rs->inst_count); } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); OUT_CS(rs->vap_vtx_state_cntl); OUT_CS(rs->vap_vsm_vtx_assm); OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(rs->vap_out_vtx_fmt[0]); OUT_CS(rs->vap_out_vtx_fmt[1]); OUT_CS_REG_SEQ(R300_GB_ENABLE, 1); OUT_CS(rs->gb_enable); if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { OUT_CS_REG_SEQ(R300_RS_IP_0, count); } OUT_CS_TABLE(rs->ip, count); OUT_CS_REG_SEQ(R300_RS_COUNT, 2); OUT_CS(rs->count); OUT_CS(rs->inst_count); if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_INST_0, count); } else { OUT_CS_REG_SEQ(R300_RS_INST_0, count); } OUT_CS_TABLE(rs->inst, count); END_CS; }
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_resource *tex; CS_LOCALS(r300); tex = r300_resource(fb->zsbuf->texture); BEGIN_CS(size); OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); OUT_CS(0); OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]); OUT_CS(0); END_CS; /* Mark the current zbuffer's zmask as in use. */ r300->zmask_in_use = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); }
static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags, struct pipe_fence_handle **fence) { struct r300_atom *atom; r300_emit_hyperz_end(r300); r300_emit_query_end(r300); if (r300->screen->caps.is_r500) r500_emit_index_bias(r300, 0); /* The DDX doesn't set these regs. */ if (r300->screen->info.drm_minor >= 6) { CS_LOCALS(r300); OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); OUT_CS(0x66666666); OUT_CS(0x6666666); } r300->flush_counter++; r300->rws->cs_flush(r300->cs, flags, fence, 0); r300->dirty_hw = 0; /* New kitchen sink, baby. */ foreach_atom(r300, atom) { if (atom->state || atom->allow_null_state) { r300_mark_atom_dirty(r300, atom); } } r300->vertex_arrays_dirty = TRUE; /* Unmark HWTCL state for SWTCL. */ if (!r300->screen->caps.has_tcl) { r300->vs_state.dirty = FALSE; r300->vs_constants.dirty = FALSE; r300->clip_state.dirty = FALSE; } }
static void r300_render_draw(struct vbuf_render* render, const ushort* indices, uint count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; CS_LOCALS(r300); r300_emit_dirty_state(r300); BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); for (i = 0; i < count-1; i += 2) { OUT_CS(indices[i+1] << 16 | indices[i]); } if (count % 2) { OUT_CS(indices[count-1]); } END_CS; }
static void r300_render_draw_arrays(struct vbuf_render* render, unsigned start, unsigned count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; CS_LOCALS(r300); r300_emit_dirty_state(r300); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300render->hwprim); END_CS; }
static void r300_render_draw_arrays(struct vbuf_render* render, unsigned start, unsigned count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; uint8_t* ptr; unsigned i; unsigned dwords = 6; CS_LOCALS(r300); (void) i; (void) ptr; DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count); if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL, NULL, 0, 0, -1)) return; } BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300render->hwprim); END_CS; r300->draw_first_emitted = TRUE; }
void r300_emit_fb_state_pipelined(struct r300_context *r300, unsigned size, void *state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; unsigned i, num_cbufs = fb->nr_cbufs; unsigned mspos0, mspos1; CS_LOCALS(r300); /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be * marked as UNUSED in the US block. */ if (r300->fb_multiwrite) { num_cbufs = MIN2(num_cbufs, 1); } BEGIN_CS(size); /* Colorbuffer format in the US block. * (must be written after unpipelined regs) */ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); for (i = 0; i < num_cbufs; i++) { OUT_CS(r300_surface(fb->cbufs[i])->format); } for (; i < 1; i++) { OUT_CS(R300_US_OUT_FMT_C4_8 | R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); } for (; i < 4; i++) { OUT_CS(R300_US_OUT_FMT_UNUSED); } /* Multisampling. Depends on framebuffer sample count. * These are pipelined regs and as such cannot be moved * to the AA state. */ mspos0 = 0x66666666; mspos1 = 0x6666666; if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { /* Subsample placement. These may not be optimal. */ switch (fb->cbufs[0]->texture->nr_samples) { case 2: mspos0 = 0x33996633; mspos1 = 0x6666663; break; case 3: mspos0 = 0x33936933; mspos1 = 0x6666663; break; case 4: mspos0 = 0x33939933; mspos1 = 0x3966663; break; case 6: mspos0 = 0x22a2aa22; mspos1 = 0x2a65672; break; default: debug_printf("r300: Bad number of multisamples!\n"); } } OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); OUT_CS(mspos0); OUT_CS(mspos1); END_CS; }
static void r300_emit_draw_elements(struct r300_context *r300, struct pipe_resource* indexBuffer, unsigned indexSize, unsigned max_index, unsigned mode, unsigned start, unsigned count, uint16_t *imm_indices3) { uint32_t count_dwords, offset_dwords; boolean alt_num_verts = count > 65535; CS_LOCALS(r300); if (count >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " "refusing to render (max_index: %i).\n", count, max_index); return; } DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, max %u\n", count, max_index); r300_emit_draw_init(r300, mode, max_index); /* If start is odd, render the first triangle with indices embedded * in the command stream. This will increase start by 3 and make it * even. We can then proceed without a fallback. */ if (indexSize == 2 && (start & 1) && mode == PIPE_PRIM_TRIANGLES) { BEGIN_CS(4); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) | R300_VAP_VF_CNTL__PRIM_TRIANGLES); OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]); OUT_CS(imm_indices3[2]); END_CS; start += 3; count -= 3; if (!count) return; } offset_dwords = indexSize * start / sizeof(uint32_t); BEGIN_CS(8 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode) | (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } else { count_dwords = (count + 1) / 2; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode) | (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); OUT_CS(count_dwords); OUT_CS_RELOC(r300_resource(indexBuffer)); END_CS; }
static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, struct pipe_fence_handle **fence, uint32_t cs_trace_id) { struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys *ws = cs->ctx->ws; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ break; case RING_GFX: /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */ while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ break; case RING_UVD: while (rcs->cdw & 15) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->cdw > rcs->max_dw) { fprintf(stderr, "amdgpu: command stream overflowed\n"); } amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); /* If the CS is not empty or overflowed.... */ if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) { int r; r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, cs->flags, &cs->request.resources); if (r) { fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r); cs->request.resources = NULL; goto cleanup; } cs->ib.size = cs->base.cdw; cs->used_ib_space += cs->base.cdw * 4; amdgpu_cs_do_submission(cs, fence); /* Cleanup. */ if (cs->request.resources) amdgpu_bo_list_destroy(cs->request.resources); } cleanup: amdgpu_cs_context_cleanup(cs); amdgpu_get_new_ib(cs); ws->num_cs_flushes++; }
/* This functions is used to draw a rectangle for the blitter module. * * If we rendered a quad, the pixels on the main diagonal * would be computed and stored twice, which makes the clear/copy codepaths * somewhat inefficient. Instead we use a rectangular point sprite. */ void r300_blitter_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib) { struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter)); unsigned last_sprite_coord_enable = r300->sprite_coord_enable; unsigned width = x2 - x1; unsigned height = y2 - y1; unsigned vertex_size = type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4; unsigned dwords = 13 + vertex_size + (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0); static const union pipe_color_union zeros; CS_LOCALS(r300); /* XXX workaround for a lockup in MSAA resolve on SWTCL chipsets, this * function most probably doesn't handle type=NONE correctly */ if (!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) { util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib); return; } if (r300->skip_rendering) return; if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) r300->sprite_coord_enable = 1; r300_update_derived_state(r300); /* Mark some states we don't care about as non-dirty. */ r300->viewport_state.dirty = FALSE; if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); BEGIN_CS(dwords); /* Set up GA. */ OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16)); if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { /* Set up the GA to generate texcoords. */ OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT)); OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); OUT_CS_32F(attrib->f[0]); OUT_CS_32F(attrib->f[3]); OUT_CS_32F(attrib->f[2]); OUT_CS_32F(attrib->f[1]); } /* Set up VAP controls. */ OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); OUT_CS(1); OUT_CS(0); /* Draw. */ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) | R300_VAP_VF_CNTL__PRIM_POINTS); OUT_CS_32F(x1 + width * 0.5f); OUT_CS_32F(y1 + height * 0.5f); OUT_CS_32F(depth); OUT_CS_32F(1); if (vertex_size == 8) { if (!attrib) attrib = &zeros; OUT_CS_TABLE(attrib->f, 4); } END_CS; done: /* Restore the state. */ r300_mark_atom_dirty(r300, &r300->rs_state); r300_mark_atom_dirty(r300, &r300->viewport_state); r300->sprite_coord_enable = last_sprite_coord_enable; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, struct pipe_fence_handle **fence, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. * hawaii with old firmware needs type2 nop packet. * accel_working2 with value 3 indicates the new firmware. */ if (cs->ws->info.chip_class <= SI || (cs->ws->info.family == CHIP_HAWAII && cs->ws->accel_working2 < 3)) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->cdw & 15) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (fence) { radeon_fence_reference(fence, NULL); *fence = radeon_cs_create_fence(rcs); } radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs; crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread) { pipe_semaphore_wait(&cs->flush_completed); radeon_drm_ws_queue_cs(cs->ws, cs); if (!(flags & RADEON_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; cs->ws->num_cs_flushes++; }
void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed, int instance_id) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; struct r300_resource *buf; int i; unsigned vertex_array_count = r300->velems->count; unsigned packet_size = (vertex_array_count * 3 + 1) / 2; struct pipe_vertex_buffer *vb1, *vb2; unsigned *hw_format_size = r300->velems->format_size; unsigned size1, size2, offset1, offset2, stride1, stride2; CS_LOCALS(r300); BEGIN_CS(2 + packet_size + vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); if (instance_id == -1) { /* Non-instanced arrays. This ignores instance_divisor and instance_id. */ for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; size2 = hw_format_size[i+1]; OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); } if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } for (i = 0; i < vertex_array_count; i++) { buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer); OUT_CS_RELOC(buf); } } else { /* Instanced arrays. */ for (i = 0; i < vertex_array_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; size1 = hw_format_size[i]; size2 = hw_format_size[i+1]; if (velem[i].instance_divisor) { stride1 = 0; offset1 = vb1->buffer_offset + velem[i].src_offset + (instance_id / velem[i].instance_divisor) * vb1->stride; } else { stride1 = vb1->stride; offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; } if (velem[i+1].instance_divisor) { stride2 = 0; offset2 = vb2->buffer_offset + velem[i+1].src_offset + (instance_id / velem[i+1].instance_divisor) * vb2->stride; } else { stride2 = vb2->stride; offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; } OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2)); OUT_CS(offset1); OUT_CS(offset2); } if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; if (velem[i].instance_divisor) { stride1 = 0; offset1 = vb1->buffer_offset + velem[i].src_offset + (instance_id / velem[i].instance_divisor) * vb1->stride; } else { stride1 = vb1->stride; offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; } OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); OUT_CS(offset1); } for (i = 0; i < vertex_array_count; i++) { buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer); OUT_CS_RELOC(buf); } } END_CS; }
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; uint32_t rb3d_cctl = 0; CS_LOCALS(r300); BEGIN_CS(size); if (r300->screen->caps.is_r500) { rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; } /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */ if (fb->nr_cbufs && r300->fb_multiwrite) { rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); } if (r300->cmask_in_use) { rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE | R300_RB3D_CCTL_CMASK_ENABLE; } OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { surf = r300_surface(r300_get_nonnull_cb(fb, i)); OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); OUT_CS_RELOC(surf); OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); OUT_CS_RELOC(surf); if (r300->cmask_in_use && i == 0) { OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0); OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask); OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value); if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) { OUT_CS_REG_SEQ(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2); OUT_CS(r300->color_clear_value_ar); OUT_CS(r300->color_clear_value_gb); } } } /* Set up the ZB part of the CBZB clear. */ if (r300->cbzb_clear) { surf = r300_surface(fb->cbufs[0]); OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset); OUT_CS_RELOC(surf); OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch); OUT_CS_RELOC(surf); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { surf = r300_surface(fb->zsbuf); OUT_CS_REG(R300_ZB_FORMAT, surf->format); OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset); OUT_CS_RELOC(surf); OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); OUT_CS_RELOC(surf); if (r300->hyperz_enabled) { /* HiZ RAM. */ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); /* Z Mask RAM. (compressed zbuffer) */ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } } END_CS; }
void r300_emit_fb_state_pipelined(struct r300_context *r300, unsigned size, void *state) { /* The sample coordinates are in the range [0,11], because * GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision. * * Some sample coordinates reach to neighboring pixels and should not be used. * (e.g. Y=11) * * The unused samples must be set to the positions of other valid samples. */ static unsigned sample_locs_1x[12] = { 6,6, 6,6, 6,6, 6,6, 6,6, 6,6 }; static unsigned sample_locs_2x[12] = { 3,9, 9,3, 9,3, 9,3, 9,3, 9,3 }; static unsigned sample_locs_4x[12] = { 4,4, 8,8, 2,10, 10,2, 10,2, 10,2 }; static unsigned sample_locs_6x[12] = { 3,1, 7,3, 11,5, 1,7, 5,9, 9,10 }; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; unsigned i, num_cbufs = fb->nr_cbufs; unsigned mspos0, mspos1; CS_LOCALS(r300); /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be * marked as UNUSED in the US block. */ if (r300->fb_multiwrite) { num_cbufs = MIN2(num_cbufs, 1); } BEGIN_CS(size); /* Colorbuffer format in the US block. * (must be written after unpipelined regs) */ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); for (i = 0; i < num_cbufs; i++) { OUT_CS(r300_surface(r300_get_nonnull_cb(fb, i))->format); } for (; i < 1; i++) { OUT_CS(R300_US_OUT_FMT_C4_8 | R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); } for (; i < 4; i++) { OUT_CS(R300_US_OUT_FMT_UNUSED); } /* Set sample positions. It depends on the framebuffer sample count. * These are pipelined regs and as such cannot be moved to the AA state. */ switch (r300->num_samples) { default: mspos0 = r300_get_mspos(0, sample_locs_1x); mspos1 = r300_get_mspos(1, sample_locs_1x); break; case 2: mspos0 = r300_get_mspos(0, sample_locs_2x); mspos1 = r300_get_mspos(1, sample_locs_2x); break; case 4: mspos0 = r300_get_mspos(0, sample_locs_4x); mspos1 = r300_get_mspos(1, sample_locs_4x); break; case 6: mspos0 = r300_get_mspos(0, sample_locs_6x); mspos1 = r300_get_mspos(1, sample_locs_6x); break; } OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); OUT_CS(mspos0); OUT_CS(mspos1); END_CS; }
static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, struct pipe_fence_handle **fence) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->current.cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->current.cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (cs->ws->info.gfx_ib_pad_with_type2) { while (rcs->current.cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->current.cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->current.cdw & 15) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->current.cdw > rcs->current.max_dw) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (fence) { if (cs->next_fence) { radeon_fence_reference(fence, cs->next_fence); } else { radeon_fence_reference(fence, NULL); *fence = radeon_cs_create_fence(rcs); } } radeon_fence_reference(&cs->next_fence, NULL); radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) { unsigned i, crelocs; crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.current.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); } switch (cs->ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: case RING_COMPUTE: cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 3; if (cs->ws->info.has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (cs->ring_type == RING_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (util_queue_is_initialized(&cs->ws->cs_queue)) { util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed, radeon_drm_cs_emit_ioctl_oneshot, NULL); if (!(flags & RADEON_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, 0); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.current.buf = cs->csc->buf; cs->base.current.cdw = 0; cs->base.used_vram = 0; cs->base.used_gart = 0; cs->ws->num_cs_flushes++; return 0; }
/* This functions is used to draw a rectangle for the blitter module. * * If we rendered a quad, the pixels on the main diagonal * would be computed and stored twice, which makes the clear/copy codepaths * somewhat inefficient. Instead we use a rectangular point sprite. */ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, unsigned x1, unsigned y1, unsigned x2, unsigned y2, float depth, enum blitter_attrib_type type, const float attrib[4]) { struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter)); unsigned last_sprite_coord_enable = r300->sprite_coord_enable; unsigned width = x2 - x1; unsigned height = y2 - y1; unsigned vertex_size = type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4; unsigned dwords = 13 + vertex_size + (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0); const float zeros[4] = {0, 0, 0, 0}; CS_LOCALS(r300); if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) r300->sprite_coord_enable = 1; r300_update_derived_state(r300); /* Mark some states we don't care about as non-dirty. */ r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); BEGIN_CS(dwords); /* Set up GA. */ OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16)); if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { /* Set up the GA to generate texcoords. */ OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT)); OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); OUT_CS_32F(attrib[0]); OUT_CS_32F(attrib[3]); OUT_CS_32F(attrib[2]); OUT_CS_32F(attrib[1]); } /* Set up VAP controls. */ OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); OUT_CS(1); OUT_CS(0); /* Draw. */ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) | R300_VAP_VF_CNTL__PRIM_POINTS); OUT_CS_32F(x1 + width * 0.5f); OUT_CS_32F(y1 + height * 0.5f); OUT_CS_32F(depth); OUT_CS_32F(1); if (vertex_size == 8) { if (!attrib) attrib = zeros; OUT_CS_TABLE(attrib, 4); } END_CS; done: /* Restore the state. */ r300->clip_state.dirty = TRUE; r300->rs_state.dirty = TRUE; r300->viewport_state.dirty = TRUE; r300->sprite_coord_enable = last_sprite_coord_enable; }
static void r300_draw_arrays_immediate(struct r300_context *r300, const struct pipe_draw_info *info) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; unsigned vertex_element_count = r300->velems->count; unsigned i, v, vbi; /* Size of the vertex, in dwords. */ unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ unsigned dwords = 4 + info->count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; /* Stride to the same attrib in the next vertex in the vertex buffer, * in dwords. */ unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; uint32_t* mapelem[PIPE_MAX_ATTRIBS]; CS_LOCALS(r300); if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; size[i] = r300->velems->format_size[i] / 4; vbi = velem->vertex_buffer_index; vbuf = &r300->vertex_buffer[vbi]; stride[i] = vbuf->stride / 4; /* Map the buffer. */ if (!map[vbi]) { map[vbi] = (uint32_t*)r300->rws->buffer_map( r300_resource(vbuf->buffer)->cs_buf, r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); } r300_emit_draw_init(r300, info->mode, info->count-1); BEGIN_CS(dwords); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) | r300_translate_primitive(info->mode)); /* Emit vertices. */ for (v = 0; v < info->count; v++) { for (i = 0; i < vertex_element_count; i++) { OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } } END_CS; }
static void r300_emit_draw_arrays_immediate(struct r300_context *r300, unsigned mode, unsigned start, unsigned count) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; unsigned vertex_element_count = r300->velems->count; unsigned i, v, vbi; /* Size of the vertex, in dwords. */ unsigned vertex_size = r300->velems->vertex_size_dwords; /* The number of dwords for this draw operation. */ unsigned dwords = 9 + count * vertex_size; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; /* Stride to the same attrib in the next vertex in the vertex buffer, * in dwords. */ unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ uint32_t* map[PIPE_MAX_ATTRIBS]; uint32_t* mapelem[PIPE_MAX_ATTRIBS]; struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; CS_LOCALS(r300); if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; size[i] = r300->velems->hw_format_size[i] / 4; vbi = velem->vertex_buffer_index; vbuf = &r300->vertex_buffer[vbi]; stride[i] = vbuf->stride / 4; /* Map the buffer. */ if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, vbuf->buffer, PIPE_TRANSFER_READ, &transfer[vbi]); map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } mapelem[i] = map[vbi] + (velem->src_offset / 4); } BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); OUT_CS(count - 1); OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]); } } END_CS; /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; if (transfer[vbi]) { vbuf = &r300->vertex_buffer[vbi]; pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); transfer[vbi] = NULL; } } }
static void r300_draw_elements_immediate(struct r300_context *r300, const struct pipe_draw_info *info) { const uint8_t *ptr1; const uint16_t *ptr2; const uint32_t *ptr4; unsigned index_size = r300->index_buffer.index_size; unsigned i, count_dwords = index_size == 4 ? info->count : (info->count + 1) / 2; CS_LOCALS(r300); /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS | PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) return; r300_emit_draw_init(r300, info->mode, info->max_index); BEGIN_CS(2 + count_dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords); switch (index_size) { case 1: ptr1 = (uint8_t*)r300->index_buffer.user_buffer; ptr1 += info->start; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | r300_translate_primitive(info->mode)); if (info->index_bias && !r300->screen->caps.is_r500) { for (i = 0; i < info->count-1; i += 2) OUT_CS(((ptr1[i+1] + info->index_bias) << 16) | (ptr1[i] + info->index_bias)); if (info->count & 1) OUT_CS(ptr1[i] + info->index_bias); } else { for (i = 0; i < info->count-1; i += 2) OUT_CS(((ptr1[i+1]) << 16) | (ptr1[i] )); if (info->count & 1) OUT_CS(ptr1[i]); } break; case 2: ptr2 = (uint16_t*)r300->index_buffer.user_buffer; ptr2 += info->start; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | r300_translate_primitive(info->mode)); if (info->index_bias && !r300->screen->caps.is_r500) { for (i = 0; i < info->count-1; i += 2) OUT_CS(((ptr2[i+1] + info->index_bias) << 16) | (ptr2[i] + info->index_bias)); if (info->count & 1) OUT_CS(ptr2[i] + info->index_bias); } else { OUT_CS_TABLE(ptr2, count_dwords); } break; case 4: ptr4 = (uint32_t*)r300->index_buffer.user_buffer; ptr4 += info->start; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) | R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(info->mode)); if (info->index_bias && !r300->screen->caps.is_r500) { for (i = 0; i < info->count; i++) OUT_CS(ptr4[i] + info->index_bias); } else { OUT_CS_TABLE(ptr4, count_dwords); } break; } END_CS; }
static void r300_emit_draw_elements(struct r300_context *r300, struct pipe_resource* indexBuffer, unsigned indexSize, unsigned minIndex, unsigned maxIndex, unsigned mode, unsigned start, unsigned count) { uint32_t count_dwords; uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); boolean alt_num_verts = count > 65535; CS_LOCALS(r300); if (count >= (1 << 24)) { fprintf(stderr, "r300: Got a huge number of vertices: %i, " "refusing to render.\n", count); return; } maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); if (alt_num_verts) { OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); OUT_CS(maxIndex); OUT_CS(minIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode) | (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } else { count_dwords = (count + 1) / 2; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode) | (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); } /* INDX_BUFFER is a truly special packet3. * Unlike most other packet3, where the offset is after the count, * the order is reversed, so the relocation ends up carrying the * size of the indexbuf instead of the offset. */ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); OUT_CS_BUF_RELOC(indexBuffer, count_dwords, r300_buffer(indexBuffer)->domain, 0); END_CS; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (flags & RADEON_FLUSH_COMPUTE) { if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } radeon_drm_cs_sync_flush(rcs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; radeon_drm_ws_queue_cs(cs->ws, cs); } else { pipe_mutex_lock(cs->ws->cs_stack_lock); if (cs->ws->thread) { while (p_atomic_read(&cs->ws->ncs)) { pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock); } } pipe_mutex_unlock(cs->ws->cs_stack_lock); radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static void r300_render_draw_elements(struct vbuf_render* render, const ushort* indices, uint count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; unsigned end_cs_dwords; unsigned max_index = (r300->draw_vbo_size - r300->draw_vbo_offset) / (r300render->r300->vertex_info.size * 4) - 1; unsigned short_count; unsigned free_dwords; CS_LOCALS(r300); DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count); if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 0, 0, -1)) return; } /* Below we manage the CS space manually because there may be more * indices than it can fit in CS. */ end_cs_dwords = r300_get_num_cs_end_dwords(r300); while (count) { free_dwords = RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw; short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); BEGIN_CS(6 + (short_count+1)/2); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) | r300render->hwprim); for (i = 0; i < short_count-1; i += 2) { OUT_CS(indices[i+1] << 16 | indices[i]); } if (short_count % 2) { OUT_CS(indices[short_count-1]); } END_CS; /* OK now subtract the emitted indices and see if we need to emit * another draw packet. */ indices += short_count; count -= short_count; if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, NULL, 256, 0, 0, -1)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); } } r300->draw_first_emitted = TRUE; }