Beispiel #1
0
void r300_emit_vertex_stream_state(struct r300_context* r300,
                                   unsigned size, void* state)
{
    struct r300_vertex_stream_state *streams =
        (struct r300_vertex_stream_state*)state;
    unsigned i;
    CS_LOCALS(r300);

    if (DBG_ON(r300, DBG_PSC)) {
        fprintf(stderr, "r300: PSC emit:\n");

        for (i = 0; i < streams->count; i++) {
            fprintf(stderr, "    : prog_stream_cntl%d: 0x%08x\n", i,
                   streams->vap_prog_stream_cntl[i]);
        }

        for (i = 0; i < streams->count; i++) {
            fprintf(stderr, "    : prog_stream_cntl_ext%d: 0x%08x\n", i,
                   streams->vap_prog_stream_cntl_ext[i]);
        }
    }

    BEGIN_CS(size);
    OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
    OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
    OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
    OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);
    END_CS;
}
Beispiel #2
0
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
{
    struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state;
    struct r300_vertex_program_code* code = &vs->code;
    struct r300_screen* r300screen = r300->screen;
    unsigned instruction_count = code->length / 4;

    unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72;
    unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1);
    unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1);
    unsigned temp_count = MAX2(code->num_temporaries, 1);

    unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count,
                                  vtx_mem_size / output_count, 10);
    unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5);

    CS_LOCALS(r300);

    BEGIN_CS(size);

    /* R300_VAP_PVS_CODE_CNTL_0
     * R300_VAP_PVS_CONST_CNTL
     * R300_VAP_PVS_CODE_CNTL_1
     * See the r5xx docs for instructions on how to use these. */
    OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) |
	       R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
	       R300_PVS_LAST_INST(instruction_count - 1));
    OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1);

    OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
    OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
    OUT_CS_TABLE(code->body.d, code->length);

    OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
            R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
            R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) |
            R300_PVS_VF_MAX_VTX_NUM(12) |
            (r300->clip_halfz ? R300_DX_CLIP_SPACE_DEF : 0) |
            (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));

    /* Emit flow control instructions.  Even if there are no fc instructions,
     * we still need to write the registers to make sure they are cleared. */
    OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
    if (r300screen->caps.is_r500) {
        OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2);
        OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2);
    } else {
        OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS);
        OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS);
    }
    OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS);
    OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS);

    END_CS;
}
Beispiel #3
0
void r300_emit_rs_block_state(struct r300_context* r300,
                              unsigned size, void* state)
{
    struct r300_rs_block* rs = (struct r300_rs_block*)state;
    unsigned i;
    /* It's the same for both INST and IP tables */
    unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
    CS_LOCALS(r300);

    if (DBG_ON(r300, DBG_RS_BLOCK)) {
        r500_dump_rs_block(rs);

        fprintf(stderr, "r300: RS emit:\n");

        for (i = 0; i < count; i++)
            fprintf(stderr, "    : ip %d: 0x%08x\n", i, rs->ip[i]);

        for (i = 0; i < count; i++)
            fprintf(stderr, "    : inst %d: 0x%08x\n", i, rs->inst[i]);

        fprintf(stderr, "    : count: 0x%08x inst_count: 0x%08x\n",
            rs->count, rs->inst_count);
    }

    BEGIN_CS(size);
    OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
    OUT_CS(rs->vap_vtx_state_cntl);
    OUT_CS(rs->vap_vsm_vtx_assm);
    OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
    OUT_CS(rs->vap_out_vtx_fmt[0]);
    OUT_CS(rs->vap_out_vtx_fmt[1]);
    OUT_CS_REG_SEQ(R300_GB_ENABLE, 1);
    OUT_CS(rs->gb_enable);

    if (r300->screen->caps.is_r500) {
        OUT_CS_REG_SEQ(R500_RS_IP_0, count);
    } else {
        OUT_CS_REG_SEQ(R300_RS_IP_0, count);
    }
    OUT_CS_TABLE(rs->ip, count);

    OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
    OUT_CS(rs->count);
    OUT_CS(rs->inst_count);

    if (r300->screen->caps.is_r500) {
        OUT_CS_REG_SEQ(R500_RS_INST_0, count);
    } else {
        OUT_CS_REG_SEQ(R300_RS_INST_0, count);
    }
    OUT_CS_TABLE(rs->inst, count);
    END_CS;
}
Beispiel #4
0
void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
{
    struct r300_dsa_state* dsa = (struct r300_dsa_state*)state;
    struct pipe_framebuffer_state* fb =
        (struct pipe_framebuffer_state*)r300->fb_state.state;
    boolean is_r500 = r300->screen->caps.is_r500;
    CS_LOCALS(r300);
    uint32_t alpha_func = dsa->alpha_function;

    /* Choose the alpha ref value between 8-bit (FG_ALPHA_FUNC.AM_VAL) and
     * 16-bit (FG_ALPHA_VALUE). */
    if (is_r500 && (alpha_func & R300_FG_ALPHA_FUNC_ENABLE)) {
        struct pipe_surface *cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL;

        if (cb &&
            (cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
             cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) {
            alpha_func |= R500_FG_ALPHA_FUNC_FP16_ENABLE;
        } else {
            alpha_func |= R500_FG_ALPHA_FUNC_8BIT;
        }
    }

    /* Setup alpha-to-coverage. */
    if (r300->alpha_to_coverage && r300->msaa_enable) {
        /* Always set 3/6, it improves precision even for 2x and 4x MSAA. */
        alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE |
                      R300_FG_ALPHA_FUNC_CFG_3_OF_6;
    }

    BEGIN_CS(size);
    OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func);
    OUT_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2);
    END_CS;
}
Beispiel #5
0
void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state)
{
    struct r300_rs_state* rs = state;
    CS_LOCALS(r300);

    BEGIN_CS(size);
    OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE);
    if (rs->polygon_offset_enable) {
        if (r300->zbuffer_bpp == 16) {
            OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5);
        } else {
            OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5);
        }
    }
    END_CS;
}
Beispiel #6
0
void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)
{
    struct r300_fragment_shader *fs = r300_fs(r300);
    struct rc_constant_list *constants = &fs->shader->code.constants;
    unsigned i;
    unsigned count = fs->shader->rc_state_count;
    unsigned first = fs->shader->externals_count;
    unsigned end = constants->Count;
    CS_LOCALS(r300);

    if (count == 0)
        return;

    BEGIN_CS(size);
    for(i = first; i < end; ++i) {
        if (constants->Constants[i].Type == RC_CONSTANT_STATE) {
            float data[4];

            get_rc_constant_state(data, r300, &constants->Constants[i]);

            OUT_CS_REG(R500_GA_US_VECTOR_INDEX,
                       R500_GA_US_VECTOR_INDEX_TYPE_CONST |
                       (i & R500_GA_US_VECTOR_INDEX_MASK));
            OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
            OUT_CS_TABLE(data, 4);
        }
    }
    END_CS;
}
Beispiel #7
0
void r300_emit_vs_constants(struct r300_context* r300,
                            unsigned size, void *state)
{
    unsigned count =
        ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
    struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
    struct r300_vertex_shader *vs = (struct r300_vertex_shader*)r300->vs_state.state;
    unsigned i;
    int imm_first = vs->externals_count;
    int imm_end = vs->code.constants.Count;
    int imm_count = vs->immediates_count;
    CS_LOCALS(r300);

    BEGIN_CS(size);
    OUT_CS_REG(R300_VAP_PVS_CONST_CNTL,
               R300_PVS_CONST_BASE_OFFSET(buf->buffer_base) |
               R300_PVS_MAX_CONST_ADDR(MAX2(imm_end - 1, 0)));
    if (vs->externals_count) {
        OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
                   (r300->screen->caps.is_r500 ?
                   R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base);
        OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
        if (buf->remap_table){
            for (i = 0; i < count; i++) {
                uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
                OUT_CS_TABLE(data, 4);
            }
        } else {
            OUT_CS_TABLE(buf->ptr, count * 4);
        }
    }

    /* Emit immediates. */
    if (imm_count) {
        OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
                   (r300->screen->caps.is_r500 ?
                   R500_PVS_CONST_START : R300_PVS_CONST_START) +
                   buf->buffer_base + imm_first);
        OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);
        for (i = imm_first; i < imm_end; i++) {
            const float *data = vs->code.constants.Constants[i].u.Immediate;
            OUT_CS_TABLE(data, 4);
        }
    }
    END_CS;
}
Beispiel #8
0
void r300_emit_viewport_state(struct r300_context* r300,
                              unsigned size, void* state)
{
    struct r300_viewport_state* viewport = (struct r300_viewport_state*)state;
    CS_LOCALS(r300);

    BEGIN_CS(size);
    OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
    OUT_CS_TABLE(&viewport->xscale, 6);
    OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
    END_CS;
}
Beispiel #9
0
void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
{
    struct r300_fragment_shader *fs = r300_fs(r300);
    struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
    unsigned count = fs->shader->externals_count;
    CS_LOCALS(r300);

    if (count == 0)
        return;

    BEGIN_CS(size);
    OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
    OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);
    if (buf->remap_table){
        for (unsigned i = 0; i < count; i++) {
            uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
            OUT_CS_TABLE(data, 4);
        }
    } else {
        OUT_CS_TABLE(buf->ptr, count * 4);
    }
    END_CS;
}
Beispiel #10
0
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
{
    struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
    struct pipe_framebuffer_state* fb =
            (struct pipe_framebuffer_state*)r300->fb_state.state;
    uint32_t height = fb->height;
    uint32_t width = fb->width;
    CS_LOCALS(r300);

    if (r300->cbzb_clear) {
        struct r300_surface *surf = r300_surface(fb->cbufs[0]);

        height = surf->cbzb_height;
        width = surf->cbzb_width;
    }

    DBG(r300, DBG_SCISSOR,
	"r300: Scissor width: %i, height: %i, CBZB clear: %s\n",
	width, height, r300->cbzb_clear ? "YES" : "NO");

    BEGIN_CS(size);

    /* Set up scissors.
     * By writing to the SC registers, SC & US assert idle. */
    OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
    if (r300->screen->caps.is_r500) {
        OUT_CS(0);
        OUT_CS(((width  - 1) << R300_SCISSORS_X_SHIFT) |
               ((height - 1) << R300_SCISSORS_Y_SHIFT));
    } else {
        OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
               (1440 << R300_SCISSORS_Y_SHIFT));
        OUT_CS(((width  + 1440-1) << R300_SCISSORS_X_SHIFT) |
               ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
    }

    /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
    OUT_CS_TABLE(gpuflush->cb_flush_clean, 6);
    END_CS;
}
Beispiel #11
0
static void r300_draw_elements_immediate(struct r300_context *r300,
                                         const struct pipe_draw_info *info)
{
    const uint8_t *ptr1;
    const uint16_t *ptr2;
    const uint32_t *ptr4;
    unsigned index_size = r300->index_buffer.index_size;
    unsigned i, count_dwords = index_size == 4 ? info->count :
                                                 (info->count + 1) / 2;
    CS_LOCALS(r300);

    /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
    if (!r300_prepare_for_rendering(r300,
            PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS |
            PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1))
        return;

    r300_emit_draw_init(r300, info->mode, info->max_index);

    BEGIN_CS(2 + count_dwords);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords);

    switch (index_size) {
    case 1:
        ptr1 = (uint8_t*)r300->index_buffer.user_buffer;
        ptr1 += info->start;

        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
               r300_translate_primitive(info->mode));

        if (info->index_bias && !r300->screen->caps.is_r500) {
            for (i = 0; i < info->count-1; i += 2)
                OUT_CS(((ptr1[i+1] + info->index_bias) << 16) |
                        (ptr1[i]   + info->index_bias));

            if (info->count & 1)
                OUT_CS(ptr1[i] + info->index_bias);
        } else {
            for (i = 0; i < info->count-1; i += 2)
                OUT_CS(((ptr1[i+1]) << 16) |
                        (ptr1[i]  ));

            if (info->count & 1)
                OUT_CS(ptr1[i]);
        }
        break;

    case 2:
        ptr2 = (uint16_t*)r300->index_buffer.user_buffer;
        ptr2 += info->start;

        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
               r300_translate_primitive(info->mode));

        if (info->index_bias && !r300->screen->caps.is_r500) {
            for (i = 0; i < info->count-1; i += 2)
                OUT_CS(((ptr2[i+1] + info->index_bias) << 16) |
                        (ptr2[i]   + info->index_bias));

            if (info->count & 1)
                OUT_CS(ptr2[i] + info->index_bias);
        } else {
            OUT_CS_TABLE(ptr2, count_dwords);
        }
        break;

    case 4:
        ptr4 = (uint32_t*)r300->index_buffer.user_buffer;
        ptr4 += info->start;

        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
               R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
               r300_translate_primitive(info->mode));

        if (info->index_bias && !r300->screen->caps.is_r500) {
            for (i = 0; i < info->count; i++)
                OUT_CS(ptr4[i] + info->index_bias);
        } else {
            OUT_CS_TABLE(ptr4, count_dwords);
        }
        break;
    }
    END_CS;
}
Beispiel #12
0
static void r300_draw_arrays_immediate(struct r300_context *r300,
                                       const struct pipe_draw_info *info)
{
    struct pipe_vertex_element* velem;
    struct pipe_vertex_buffer* vbuf;
    unsigned vertex_element_count = r300->velems->count;
    unsigned i, v, vbi;

    /* Size of the vertex, in dwords. */
    unsigned vertex_size = r300->velems->vertex_size_dwords;

    /* The number of dwords for this draw operation. */
    unsigned dwords = 4 + info->count * vertex_size;

    /* Size of the vertex element, in dwords. */
    unsigned size[PIPE_MAX_ATTRIBS];

    /* Stride to the same attrib in the next vertex in the vertex buffer,
     * in dwords. */
    unsigned stride[PIPE_MAX_ATTRIBS];

    /* Mapped vertex buffers. */
    uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
    uint32_t* mapelem[PIPE_MAX_ATTRIBS];

    CS_LOCALS(r300);

    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
        return;

    /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
    for (i = 0; i < vertex_element_count; i++) {
        velem = &r300->velems->velem[i];
        size[i] = r300->velems->format_size[i] / 4;
        vbi = velem->vertex_buffer_index;
        vbuf = &r300->vertex_buffer[vbi];
        stride[i] = vbuf->stride / 4;

        /* Map the buffer. */
        if (!map[vbi]) {
            map[vbi] = (uint32_t*)r300->rws->buffer_map(
                r300_resource(vbuf->buffer)->cs_buf,
                r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
            map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start;
        }
        mapelem[i] = map[vbi] + (velem->src_offset / 4);
    }

    r300_emit_draw_init(r300, info->mode, info->count-1);

    BEGIN_CS(dwords);
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) |
            r300_translate_primitive(info->mode));

    /* Emit vertices. */
    for (v = 0; v < info->count; v++) {
        for (i = 0; i < vertex_element_count; i++) {
            OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
        }
    }
    END_CS;
}
Beispiel #13
0
/* This functions is used to draw a rectangle for the blitter module.
 *
 * If we rendered a quad, the pixels on the main diagonal
 * would be computed and stored twice, which makes the clear/copy codepaths
 * somewhat inefficient. Instead we use a rectangular point sprite. */
void r300_blitter_draw_rectangle(struct blitter_context *blitter,
                                 int x1, int y1, int x2, int y2,
                                 float depth,
                                 enum blitter_attrib_type type,
                                 const union pipe_color_union *attrib)
{
    struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
    unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
    unsigned width = x2 - x1;
    unsigned height = y2 - y1;
    unsigned vertex_size =
            type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
    unsigned dwords = 13 + vertex_size +
                      (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
    static const union pipe_color_union zeros;
    CS_LOCALS(r300);

    /* XXX workaround for a lockup in MSAA resolve on SWTCL chipsets, this
     * function most probably doesn't handle type=NONE correctly */
    if (!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) {
        util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
        return;
    }

    if (r300->skip_rendering)
        return;

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
        r300->sprite_coord_enable = 1;

    r300_update_derived_state(r300);

    /* Mark some states we don't care about as non-dirty. */
    r300->viewport_state.dirty = FALSE;

    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
        goto done;

    DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");

    BEGIN_CS(dwords);
    /* Set up GA. */
    OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
        /* Set up the GA to generate texcoords. */
        OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
                   (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
        OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
        OUT_CS_32F(attrib->f[0]);
        OUT_CS_32F(attrib->f[3]);
        OUT_CS_32F(attrib->f[2]);
        OUT_CS_32F(attrib->f[1]);
    }

    /* Set up VAP controls. */
    OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
    OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(1);
    OUT_CS(0);

    /* Draw. */
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
           R300_VAP_VF_CNTL__PRIM_POINTS);

    OUT_CS_32F(x1 + width * 0.5f);
    OUT_CS_32F(y1 + height * 0.5f);
    OUT_CS_32F(depth);
    OUT_CS_32F(1);

    if (vertex_size == 8) {
        if (!attrib)
            attrib = &zeros;
        OUT_CS_TABLE(attrib->f, 4);
    }
    END_CS;

done:
    /* Restore the state. */
    r300_mark_atom_dirty(r300, &r300->rs_state);
    r300_mark_atom_dirty(r300, &r300->viewport_state);

    r300->sprite_coord_enable = last_sprite_coord_enable;
}
static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
                                            unsigned mode,
                                            unsigned start,
                                            unsigned count)
{
    struct pipe_vertex_element* velem;
    struct pipe_vertex_buffer* vbuf;
    unsigned vertex_element_count = r300->velems->count;
    unsigned i, v, vbi;

    /* Size of the vertex, in dwords. */
    unsigned vertex_size = r300->velems->vertex_size_dwords;

    /* The number of dwords for this draw operation. */
    unsigned dwords = 9 + count * vertex_size;

    /* Size of the vertex element, in dwords. */
    unsigned size[PIPE_MAX_ATTRIBS];

    /* Stride to the same attrib in the next vertex in the vertex buffer,
     * in dwords. */
    unsigned stride[PIPE_MAX_ATTRIBS];

    /* Mapped vertex buffers. */
    uint32_t* map[PIPE_MAX_ATTRIBS];
    uint32_t* mapelem[PIPE_MAX_ATTRIBS];
    struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0};

    CS_LOCALS(r300);

    if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
        return;

    /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
    for (i = 0; i < vertex_element_count; i++) {
        velem = &r300->velems->velem[i];
        size[i] = r300->velems->hw_format_size[i] / 4;
        vbi = velem->vertex_buffer_index;
        vbuf = &r300->vertex_buffer[vbi];
        stride[i] = vbuf->stride / 4;

        /* Map the buffer. */
        if (!transfer[vbi]) {
            map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
                                                  vbuf->buffer,
                                                  PIPE_TRANSFER_READ,
						  &transfer[vbi]);
            map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
        }
        mapelem[i] = map[vbi] + (velem->src_offset / 4);
    }

    BEGIN_CS(dwords);
    OUT_CS_REG(R300_GA_COLOR_CONTROL,
            r300_provoking_vertex_fixes(r300, mode));
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(count - 1);
    OUT_CS(0);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
            r300_translate_primitive(mode));

    /* Emit vertices. */
    for (v = 0; v < count; v++) {
        for (i = 0; i < vertex_element_count; i++) {
            OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
        }
    }
    END_CS;

    /* Unmap buffers. */
    for (i = 0; i < vertex_element_count; i++) {
        vbi = r300->velems->velem[i].vertex_buffer_index;

        if (transfer[vbi]) {
            vbuf = &r300->vertex_buffer[vbi];
            pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
            transfer[vbi] = NULL;
        }
    }
}
/* This functions is used to draw a rectangle for the blitter module.
 *
 * If we rendered a quad, the pixels on the main diagonal
 * would be computed and stored twice, which makes the clear/copy codepaths
 * somewhat inefficient. Instead we use a rectangular point sprite. */
static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
                                        unsigned x1, unsigned y1,
                                        unsigned x2, unsigned y2,
                                        float depth,
                                        enum blitter_attrib_type type,
                                        const float attrib[4])
{
    struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
    unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
    unsigned width = x2 - x1;
    unsigned height = y2 - y1;
    unsigned vertex_size =
            type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
    unsigned dwords = 13 + vertex_size +
                      (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
    const float zeros[4] = {0, 0, 0, 0};
    CS_LOCALS(r300);

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
        r300->sprite_coord_enable = 1;

    r300_update_derived_state(r300);

    /* Mark some states we don't care about as non-dirty. */
    r300->clip_state.dirty = FALSE;
    r300->viewport_state.dirty = FALSE;

    if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
        goto done;

    DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");

    BEGIN_CS(dwords);
    /* Set up GA. */
    OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
        /* Set up the GA to generate texcoords. */
        OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
                   (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
        OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
        OUT_CS_32F(attrib[0]);
        OUT_CS_32F(attrib[3]);
        OUT_CS_32F(attrib[2]);
        OUT_CS_32F(attrib[1]);
    }

    /* Set up VAP controls. */
    OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
    OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(1);
    OUT_CS(0);

    /* Draw. */
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
           R300_VAP_VF_CNTL__PRIM_POINTS);

    OUT_CS_32F(x1 + width * 0.5f);
    OUT_CS_32F(y1 + height * 0.5f);
    OUT_CS_32F(depth);
    OUT_CS_32F(1);

    if (vertex_size == 8) {
        if (!attrib)
            attrib = zeros;
        OUT_CS_TABLE(attrib, 4);
    }
    END_CS;

done:
    /* Restore the state. */
    r300->clip_state.dirty = TRUE;
    r300->rs_state.dirty = TRUE;
    r300->viewport_state.dirty = TRUE;

    r300->sprite_coord_enable = last_sprite_coord_enable;
}