예제 #1
0
void r300_emit_scissor_state(struct r300_context* r300,
                             unsigned size, void* state)
{
    struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state;
    CS_LOCALS(r300);

    BEGIN_CS(size);
    OUT_CS_REG_SEQ(R300_SC_CLIPRECT_TL_0, 2);
    if (r300->screen->caps.is_r500) {
        OUT_CS((scissor->minx << R300_CLIPRECT_X_SHIFT) |
               (scissor->miny << R300_CLIPRECT_Y_SHIFT));
        OUT_CS(((scissor->maxx - 1) << R300_CLIPRECT_X_SHIFT) |
               ((scissor->maxy - 1) << R300_CLIPRECT_Y_SHIFT));
    } else {
        OUT_CS(((scissor->minx + 1440) << R300_CLIPRECT_X_SHIFT) |
               ((scissor->miny + 1440) << R300_CLIPRECT_Y_SHIFT));
        OUT_CS(((scissor->maxx + 1440-1) << R300_CLIPRECT_X_SHIFT) |
               ((scissor->maxy + 1440-1) << R300_CLIPRECT_Y_SHIFT));
    }
    END_CS;
}
예제 #2
0
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
{
    struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
    struct pipe_framebuffer_state* fb =
            (struct pipe_framebuffer_state*)r300->fb_state.state;
    uint32_t height = fb->height;
    uint32_t width = fb->width;
    CS_LOCALS(r300);

    if (r300->cbzb_clear) {
        struct r300_surface *surf = r300_surface(fb->cbufs[0]);

        height = surf->cbzb_height;
        width = surf->cbzb_width;
    }

    DBG(r300, DBG_SCISSOR,
	"r300: Scissor width: %i, height: %i, CBZB clear: %s\n",
	width, height, r300->cbzb_clear ? "YES" : "NO");

    BEGIN_CS(size);

    /* Set up scissors.
     * By writing to the SC registers, SC & US assert idle. */
    OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
    if (r300->screen->caps.is_r500) {
        OUT_CS(0);
        OUT_CS(((width  - 1) << R300_SCISSORS_X_SHIFT) |
               ((height - 1) << R300_SCISSORS_Y_SHIFT));
    } else {
        OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
               (1440 << R300_SCISSORS_Y_SHIFT));
        OUT_CS(((width  + 1440-1) << R300_SCISSORS_X_SHIFT) |
               ((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
    }

    /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
    OUT_CS_TABLE(gpuflush->cb_flush_clean, 6);
    END_CS;
}
예제 #3
0
void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
{
    CS_LOCALS(r300);

    DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, "
            "vertex size %d\n", r300->vbo,
            r300->vertex_info.size);
    /* Set the pointer to our vertex buffer. The emitted values are this:
     * PACKET3 [3D_LOAD_VBPNTR]
     * COUNT   [1]
     * FORMAT  [size | stride << 8]
     * OFFSET  [offset into BO]
     * VBPNTR  [relocated BO]
     */
    BEGIN_CS(7);
    OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
    OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
    OUT_CS(r300->vertex_info.size |
            (r300->vertex_info.size << 8));
    OUT_CS(r300->draw_vbo_offset);
    OUT_CS(0);

    assert(r300->vbo_cs);
    OUT_CS(0xc0001000); /* PKT3_NOP */
    OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4);
    END_CS;
}
예제 #4
0
static void r300_emit_draw_arrays(struct r300_context *r300,
                                  unsigned mode,
                                  unsigned count)
{
    CS_LOCALS(r300);

    BEGIN_CS(4);
    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
           r300_translate_primitive(mode));
    END_CS;
}
예제 #5
0
void r300_emit_rs_block_state(struct r300_context* r300,
                              unsigned size, void* state)
{
    struct r300_rs_block* rs = (struct r300_rs_block*)state;
    unsigned i;
    /* It's the same for both INST and IP tables */
    unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
    CS_LOCALS(r300);

    if (DBG_ON(r300, DBG_RS_BLOCK)) {
        r500_dump_rs_block(rs);

        fprintf(stderr, "r300: RS emit:\n");

        for (i = 0; i < count; i++)
            fprintf(stderr, "    : ip %d: 0x%08x\n", i, rs->ip[i]);

        for (i = 0; i < count; i++)
            fprintf(stderr, "    : inst %d: 0x%08x\n", i, rs->inst[i]);

        fprintf(stderr, "    : count: 0x%08x inst_count: 0x%08x\n",
            rs->count, rs->inst_count);
    }

    BEGIN_CS(size);
    OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
    OUT_CS(rs->vap_vtx_state_cntl);
    OUT_CS(rs->vap_vsm_vtx_assm);
    OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
    OUT_CS(rs->vap_out_vtx_fmt[0]);
    OUT_CS(rs->vap_out_vtx_fmt[1]);
    OUT_CS_REG_SEQ(R300_GB_ENABLE, 1);
    OUT_CS(rs->gb_enable);

    if (r300->screen->caps.is_r500) {
        OUT_CS_REG_SEQ(R500_RS_IP_0, count);
    } else {
        OUT_CS_REG_SEQ(R300_RS_IP_0, count);
    }
    OUT_CS_TABLE(rs->ip, count);

    OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
    OUT_CS(rs->count);
    OUT_CS(rs->inst_count);

    if (r300->screen->caps.is_r500) {
        OUT_CS_REG_SEQ(R500_RS_INST_0, count);
    } else {
        OUT_CS_REG_SEQ(R300_RS_INST_0, count);
    }
    OUT_CS_TABLE(rs->inst, count);
    END_CS;
}
예제 #6
0
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
{
    struct pipe_framebuffer_state *fb =
        (struct pipe_framebuffer_state*)r300->fb_state.state;
    struct r300_resource *tex;
    CS_LOCALS(r300);

    tex = r300_resource(fb->zsbuf->texture);

    BEGIN_CS(size);
    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
        R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
        R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
    OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2);
    OUT_CS(0);
    OUT_CS(tex->tex.zmask_dwords[fb->zsbuf->u.tex.level]);
    OUT_CS(0);
    END_CS;

    /* Mark the current zbuffer's zmask as in use. */
    r300->zmask_in_use = TRUE;
    r300_mark_atom_dirty(r300, &r300->hyperz_state);
}
예제 #7
0
static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags,
                                   struct pipe_fence_handle **fence)
{
    struct r300_atom *atom;

    r300_emit_hyperz_end(r300);
    r300_emit_query_end(r300);
    if (r300->screen->caps.is_r500)
        r500_emit_index_bias(r300, 0);

    /* The DDX doesn't set these regs. */
    if (r300->screen->info.drm_minor >= 6) {
        CS_LOCALS(r300);
        OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
        OUT_CS(0x66666666);
        OUT_CS(0x6666666);
    }

    r300->flush_counter++;
    r300->rws->cs_flush(r300->cs, flags, fence, 0);
    r300->dirty_hw = 0;

    /* New kitchen sink, baby. */
    foreach_atom(r300, atom) {
        if (atom->state || atom->allow_null_state) {
            r300_mark_atom_dirty(r300, atom);
        }
    }
    r300->vertex_arrays_dirty = TRUE;

    /* Unmark HWTCL state for SWTCL. */
    if (!r300->screen->caps.has_tcl) {
        r300->vs_state.dirty = FALSE;
        r300->vs_constants.dirty = FALSE;
        r300->clip_state.dirty = FALSE;
    }
}
예제 #8
0
static void r300_render_draw(struct vbuf_render* render,
                                   const ushort* indices,
                                   uint count)
{
    struct r300_render* r300render = r300_render(render);
    struct r300_context* r300 = r300render->r300;
    int i;

    CS_LOCALS(r300);

    r300_emit_dirty_state(r300);

    BEGIN_CS(2 + (count+1)/2);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
           r300render->hwprim);
    for (i = 0; i < count-1; i += 2) {
        OUT_CS(indices[i+1] << 16 | indices[i]);
    }
    if (count % 2) {
        OUT_CS(indices[count-1]);
    }
    END_CS;
}
예제 #9
0
static void r300_render_draw_arrays(struct vbuf_render* render,
                                          unsigned start,
                                          unsigned count)
{
    struct r300_render* r300render = r300_render(render);
    struct r300_context* r300 = r300render->r300;

    CS_LOCALS(r300);

    r300_emit_dirty_state(r300);

    DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);

    BEGIN_CS(2);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
           r300render->hwprim);
    END_CS;
}
예제 #10
0
static void r300_render_draw_arrays(struct vbuf_render* render,
                                    unsigned start,
                                    unsigned count)
{
    struct r300_render* r300render = r300_render(render);
    struct r300_context* r300 = r300render->r300;
    uint8_t* ptr;
    unsigned i;
    unsigned dwords = 6;

    CS_LOCALS(r300);
    (void) i; (void) ptr;

    DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);

    if (r300->draw_first_emitted) {
        if (!r300_prepare_for_rendering(r300,
                PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL,
                NULL, dwords, 0, 0, -1))
            return;
    } else {
        if (!r300_emit_states(r300,
                PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL,
                NULL, 0, 0, -1))
            return;
    }

    BEGIN_CS(dwords);
    OUT_CS_REG(R300_GA_COLOR_CONTROL,
            r300_provoking_vertex_fixes(r300, r300render->prim));
    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
           r300render->hwprim);
    END_CS;

    r300->draw_first_emitted = TRUE;
}
예제 #11
0
void r300_emit_fb_state_pipelined(struct r300_context *r300,
                                  unsigned size, void *state)
{
    struct pipe_framebuffer_state* fb =
            (struct pipe_framebuffer_state*)r300->fb_state.state;
    unsigned i, num_cbufs = fb->nr_cbufs;
    unsigned mspos0, mspos1;
    CS_LOCALS(r300);

    /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be
     * marked as UNUSED in the US block. */
    if (r300->fb_multiwrite) {
        num_cbufs = MIN2(num_cbufs, 1);
    }

    BEGIN_CS(size);

    /* Colorbuffer format in the US block.
     * (must be written after unpipelined regs) */
    OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
    for (i = 0; i < num_cbufs; i++) {
        OUT_CS(r300_surface(fb->cbufs[i])->format);
    }
    for (; i < 1; i++) {
        OUT_CS(R300_US_OUT_FMT_C4_8 |
               R300_C0_SEL_B | R300_C1_SEL_G |
               R300_C2_SEL_R | R300_C3_SEL_A);
    }
    for (; i < 4; i++) {
        OUT_CS(R300_US_OUT_FMT_UNUSED);
    }

    /* Multisampling. Depends on framebuffer sample count.
     * These are pipelined regs and as such cannot be moved
     * to the AA state. */
    mspos0 = 0x66666666;
    mspos1 = 0x6666666;

    if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
        /* Subsample placement. These may not be optimal. */
        switch (fb->cbufs[0]->texture->nr_samples) {
        case 2:
            mspos0 = 0x33996633;
            mspos1 = 0x6666663;
            break;
        case 3:
            mspos0 = 0x33936933;
            mspos1 = 0x6666663;
            break;
        case 4:
            mspos0 = 0x33939933;
            mspos1 = 0x3966663;
            break;
        case 6:
            mspos0 = 0x22a2aa22;
            mspos1 = 0x2a65672;
            break;
        default:
            debug_printf("r300: Bad number of multisamples!\n");
        }
    }

    OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
    OUT_CS(mspos0);
    OUT_CS(mspos1);
    END_CS;
}
예제 #12
0
static void r300_emit_draw_elements(struct r300_context *r300,
                                    struct pipe_resource* indexBuffer,
                                    unsigned indexSize,
                                    unsigned max_index,
                                    unsigned mode,
                                    unsigned start,
                                    unsigned count,
                                    uint16_t *imm_indices3)
{
    uint32_t count_dwords, offset_dwords;
    boolean alt_num_verts = count > 65535;
    CS_LOCALS(r300);

    if (count >= (1 << 24)) {
        fprintf(stderr, "r300: Got a huge number of vertices: %i, "
                "refusing to render (max_index: %i).\n", count, max_index);
        return;
    }

    DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, max %u\n",
        count, max_index);

    r300_emit_draw_init(r300, mode, max_index);

    /* If start is odd, render the first triangle with indices embedded
     * in the command stream. This will increase start by 3 and make it
     * even. We can then proceed without a fallback. */
    if (indexSize == 2 && (start & 1) &&
        mode == PIPE_PRIM_TRIANGLES) {
        BEGIN_CS(4);
        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 2);
        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (3 << 16) |
               R300_VAP_VF_CNTL__PRIM_TRIANGLES);
        OUT_CS(imm_indices3[1] << 16 | imm_indices3[0]);
        OUT_CS(imm_indices3[2]);
        END_CS;

        start += 3;
        count -= 3;
        if (!count)
           return;
    }

    offset_dwords = indexSize * start / sizeof(uint32_t);

    BEGIN_CS(8 + (alt_num_verts ? 2 : 0));
    if (alt_num_verts) {
        OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
    }
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
    if (indexSize == 4) {
        count_dwords = count;
        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
               R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
               r300_translate_primitive(mode) |
               (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
    } else {
        count_dwords = (count + 1) / 2;
        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
               r300_translate_primitive(mode) |
               (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
    }

    OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
    OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
           (0 << R300_INDX_BUFFER_SKIP_SHIFT));
    OUT_CS(offset_dwords << 2);
    OUT_CS(count_dwords);
    OUT_CS_RELOC(r300_resource(indexBuffer));
    END_CS;
}
예제 #13
0
파일: amdgpu_cs.c 프로젝트: jonasarrow/mesa
static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
                            unsigned flags,
                            struct pipe_fence_handle **fence,
                            uint32_t cs_trace_id)
{
   struct amdgpu_cs *cs = amdgpu_cs(rcs);
   struct amdgpu_winsys *ws = cs->ctx->ws;

   switch (cs->base.ring_type) {
   case RING_DMA:
      /* pad DMA ring to 8 DWs */
      while (rcs->cdw & 7)
         OUT_CS(&cs->base, 0x00000000); /* NOP packet */
      break;
   case RING_GFX:
      /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */
      while (rcs->cdw & 7)
         OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
      break;
   case RING_UVD:
      while (rcs->cdw & 15)
         OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
      break;
   default:
      break;
   }

   if (rcs->cdw > rcs->max_dw) {
      fprintf(stderr, "amdgpu: command stream overflowed\n");
   }

   amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer,
		       RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);

   /* If the CS is not empty or overflowed.... */
   if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
      int r;

      r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
                                cs->handles, cs->flags,
                                &cs->request.resources);

      if (r) {
         fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r);
         cs->request.resources = NULL;
	 goto cleanup;
      }

      cs->ib.size = cs->base.cdw;
      cs->used_ib_space += cs->base.cdw * 4;

      amdgpu_cs_do_submission(cs, fence);

      /* Cleanup. */
      if (cs->request.resources)
         amdgpu_bo_list_destroy(cs->request.resources);
   }

cleanup:
   amdgpu_cs_context_cleanup(cs);
   amdgpu_get_new_ib(cs);

   ws->num_cs_flushes++;
}
예제 #14
0
/* This functions is used to draw a rectangle for the blitter module.
 *
 * If we rendered a quad, the pixels on the main diagonal
 * would be computed and stored twice, which makes the clear/copy codepaths
 * somewhat inefficient. Instead we use a rectangular point sprite. */
void r300_blitter_draw_rectangle(struct blitter_context *blitter,
                                 int x1, int y1, int x2, int y2,
                                 float depth,
                                 enum blitter_attrib_type type,
                                 const union pipe_color_union *attrib)
{
    struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
    unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
    unsigned width = x2 - x1;
    unsigned height = y2 - y1;
    unsigned vertex_size =
            type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
    unsigned dwords = 13 + vertex_size +
                      (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
    static const union pipe_color_union zeros;
    CS_LOCALS(r300);

    /* XXX workaround for a lockup in MSAA resolve on SWTCL chipsets, this
     * function most probably doesn't handle type=NONE correctly */
    if (!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) {
        util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
        return;
    }

    if (r300->skip_rendering)
        return;

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
        r300->sprite_coord_enable = 1;

    r300_update_derived_state(r300);

    /* Mark some states we don't care about as non-dirty. */
    r300->viewport_state.dirty = FALSE;

    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
        goto done;

    DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");

    BEGIN_CS(dwords);
    /* Set up GA. */
    OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
        /* Set up the GA to generate texcoords. */
        OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
                   (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
        OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
        OUT_CS_32F(attrib->f[0]);
        OUT_CS_32F(attrib->f[3]);
        OUT_CS_32F(attrib->f[2]);
        OUT_CS_32F(attrib->f[1]);
    }

    /* Set up VAP controls. */
    OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
    OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(1);
    OUT_CS(0);

    /* Draw. */
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
           R300_VAP_VF_CNTL__PRIM_POINTS);

    OUT_CS_32F(x1 + width * 0.5f);
    OUT_CS_32F(y1 + height * 0.5f);
    OUT_CS_32F(depth);
    OUT_CS_32F(1);

    if (vertex_size == 8) {
        if (!attrib)
            attrib = &zeros;
        OUT_CS_TABLE(attrib->f, 4);
    }
    END_CS;

done:
    /* Restore the state. */
    r300_mark_atom_dirty(r300, &r300->rs_state);
    r300_mark_atom_dirty(r300, &r300->viewport_state);

    r300->sprite_coord_enable = last_sprite_coord_enable;
}
예제 #15
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
                                unsigned flags,
                                struct pipe_fence_handle **fence,
                                uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         * hawaii with old firmware needs type2 nop packet.
         * accel_working2 with value 3 indicates the new firmware.
         */
        if (cs->ws->info.chip_class <= SI ||
            (cs->ws->info.family == CHIP_HAWAII &&
             cs->ws->accel_working2 < 3)) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->cdw & 15)
            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (fence) {
        radeon_fence_reference(fence, NULL);
        *fence = radeon_cs_create_fence(rcs);
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs;

        crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread) {
            pipe_semaphore_wait(&cs->flush_completed);
            radeon_drm_ws_queue_cs(cs->ws, cs);
            if (!(flags & RADEON_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;

    cs->ws->num_cs_flushes++;
}
예제 #16
0
void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
                             boolean indexed, int instance_id)
{
    struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
    struct pipe_vertex_element *velem = r300->velems->velem;
    struct r300_resource *buf;
    int i;
    unsigned vertex_array_count = r300->velems->count;
    unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
    struct pipe_vertex_buffer *vb1, *vb2;
    unsigned *hw_format_size = r300->velems->format_size;
    unsigned size1, size2, offset1, offset2, stride1, stride2;
    CS_LOCALS(r300);

    BEGIN_CS(2 + packet_size + vertex_array_count * 2);
    OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
    OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));

    if (instance_id == -1) {
        /* Non-instanced arrays. This ignores instance_divisor and instance_id. */
        for (i = 0; i < vertex_array_count - 1; i += 2) {
            vb1 = &vbuf[velem[i].vertex_buffer_index];
            vb2 = &vbuf[velem[i+1].vertex_buffer_index];
            size1 = hw_format_size[i];
            size2 = hw_format_size[i+1];

            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
                   R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
            OUT_CS(vb1->buffer_offset + velem[i].src_offset   + offset * vb1->stride);
            OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
        }

        if (vertex_array_count & 1) {
            vb1 = &vbuf[velem[i].vertex_buffer_index];
            size1 = hw_format_size[i];

            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
            OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
        }

        for (i = 0; i < vertex_array_count; i++) {
            buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer);
            OUT_CS_RELOC(buf);
        }
    } else {
        /* Instanced arrays. */
        for (i = 0; i < vertex_array_count - 1; i += 2) {
            vb1 = &vbuf[velem[i].vertex_buffer_index];
            vb2 = &vbuf[velem[i+1].vertex_buffer_index];
            size1 = hw_format_size[i];
            size2 = hw_format_size[i+1];

            if (velem[i].instance_divisor) {
                stride1 = 0;
                offset1 = vb1->buffer_offset + velem[i].src_offset +
                          (instance_id / velem[i].instance_divisor) * vb1->stride;
            } else {
                stride1 = vb1->stride;
                offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
            }
            if (velem[i+1].instance_divisor) {
                stride2 = 0;
                offset2 = vb2->buffer_offset + velem[i+1].src_offset +
                          (instance_id / velem[i+1].instance_divisor) * vb2->stride;
            } else {
                stride2 = vb2->stride;
                offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride;
            }

            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
                   R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2));
            OUT_CS(offset1);
            OUT_CS(offset2);
        }

        if (vertex_array_count & 1) {
            vb1 = &vbuf[velem[i].vertex_buffer_index];
            size1 = hw_format_size[i];

            if (velem[i].instance_divisor) {
                stride1 = 0;
                offset1 = vb1->buffer_offset + velem[i].src_offset +
                          (instance_id / velem[i].instance_divisor) * vb1->stride;
            } else {
                stride1 = vb1->stride;
                offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
            }

            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
            OUT_CS(offset1);
        }

        for (i = 0; i < vertex_array_count; i++) {
            buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer);
            OUT_CS_RELOC(buf);
        }
    }
    END_CS;
}
예제 #17
0
void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
{
    struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
    struct r300_surface* surf;
    unsigned i;
    uint32_t rb3d_cctl = 0;

    CS_LOCALS(r300);

    BEGIN_CS(size);

    if (r300->screen->caps.is_r500) {
        rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE;
    }
    /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */
    if (fb->nr_cbufs && r300->fb_multiwrite) {
        rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs);
    }
    if (r300->cmask_in_use) {
        rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE |
                     R300_RB3D_CCTL_CMASK_ENABLE;
    }

    OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl);

    /* Set up colorbuffers. */
    for (i = 0; i < fb->nr_cbufs; i++) {
        surf = r300_surface(r300_get_nonnull_cb(fb, i));

        OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset);
        OUT_CS_RELOC(surf);

        OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch);
        OUT_CS_RELOC(surf);

        if (r300->cmask_in_use && i == 0) {
            OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0);
            OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask);
            OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value);
            if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) {
                OUT_CS_REG_SEQ(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
                OUT_CS(r300->color_clear_value_ar);
                OUT_CS(r300->color_clear_value_gb);
            }
        }
    }

    /* Set up the ZB part of the CBZB clear. */
    if (r300->cbzb_clear) {
        surf = r300_surface(fb->cbufs[0]);

        OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);

        OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->cbzb_midpoint_offset);
        OUT_CS_RELOC(surf);

        OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->cbzb_pitch);
        OUT_CS_RELOC(surf);

        DBG(r300, DBG_CBZB,
            "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format,
            surf->cbzb_pitch);
    }
    /* Set up a zbuffer. */
    else if (fb->zsbuf) {
        surf = r300_surface(fb->zsbuf);

        OUT_CS_REG(R300_ZB_FORMAT, surf->format);

        OUT_CS_REG(R300_ZB_DEPTHOFFSET, surf->offset);
        OUT_CS_RELOC(surf);

        OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch);
        OUT_CS_RELOC(surf);

        if (r300->hyperz_enabled) {
            /* HiZ RAM. */
            OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
            OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);
            /* Z Mask RAM. (compressed zbuffer) */
            OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
            OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask);
        }
    }

    END_CS;
}
예제 #18
0
void r300_emit_fb_state_pipelined(struct r300_context *r300,
                                  unsigned size, void *state)
{
    /* The sample coordinates are in the range [0,11], because
     * GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision.
     *
     * Some sample coordinates reach to neighboring pixels and should not be used.
     * (e.g. Y=11)
     *
     * The unused samples must be set to the positions of other valid samples. */
    static unsigned sample_locs_1x[12] = {
        6,6,  6,6,  6,6,  6,6,  6,6,  6,6
    };
    static unsigned sample_locs_2x[12] = {
        3,9,  9,3,  9,3,  9,3,  9,3,  9,3
    };
    static unsigned sample_locs_4x[12] = {
        4,4,  8,8,  2,10,  10,2,  10,2,  10,2
    };
    static unsigned sample_locs_6x[12] = {
        3,1,  7,3,  11,5,  1,7,  5,9,  9,10
    };

    struct pipe_framebuffer_state* fb =
            (struct pipe_framebuffer_state*)r300->fb_state.state;
    unsigned i, num_cbufs = fb->nr_cbufs;
    unsigned mspos0, mspos1;
    CS_LOCALS(r300);

    /* If we use the multiwrite feature, the colorbuffers 2,3,4 must be
     * marked as UNUSED in the US block. */
    if (r300->fb_multiwrite) {
        num_cbufs = MIN2(num_cbufs, 1);
    }

    BEGIN_CS(size);

    /* Colorbuffer format in the US block.
     * (must be written after unpipelined regs) */
    OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
    for (i = 0; i < num_cbufs; i++) {
        OUT_CS(r300_surface(r300_get_nonnull_cb(fb, i))->format);
    }
    for (; i < 1; i++) {
        OUT_CS(R300_US_OUT_FMT_C4_8 |
               R300_C0_SEL_B | R300_C1_SEL_G |
               R300_C2_SEL_R | R300_C3_SEL_A);
    }
    for (; i < 4; i++) {
        OUT_CS(R300_US_OUT_FMT_UNUSED);
    }

    /* Set sample positions. It depends on the framebuffer sample count.
     * These are pipelined regs and as such cannot be moved to the AA state.
     */
    switch (r300->num_samples) {
    default:
        mspos0 = r300_get_mspos(0, sample_locs_1x);
        mspos1 = r300_get_mspos(1, sample_locs_1x);
        break;
    case 2:
        mspos0 = r300_get_mspos(0, sample_locs_2x);
        mspos1 = r300_get_mspos(1, sample_locs_2x);
        break;
    case 4:
        mspos0 = r300_get_mspos(0, sample_locs_4x);
        mspos1 = r300_get_mspos(1, sample_locs_4x);
        break;
    case 6:
        mspos0 = r300_get_mspos(0, sample_locs_6x);
        mspos1 = r300_get_mspos(1, sample_locs_6x);
        break;
    }

    OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
    OUT_CS(mspos0);
    OUT_CS(mspos1);
    END_CS;
}
예제 #19
0
static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
                               unsigned flags,
                               struct pipe_fence_handle **fence)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->current.cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->current.cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (cs->ws->info.gfx_ib_pad_with_type2) {
            while (rcs->current.cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->current.cdw & 7)
                OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->current.cdw & 15)
            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->current.cdw > rcs->current.max_dw) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (fence) {
       if (cs->next_fence) {
          radeon_fence_reference(fence, cs->next_fence);
       } else {
          radeon_fence_reference(fence, NULL);
          *fence = radeon_cs_create_fence(rcs);
       }
    }
    radeon_fence_reference(&cs->next_fence, NULL);

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
        unsigned i, crelocs;

        crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.current.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
        }

        switch (cs->ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
        case RING_COMPUTE:
            cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 3;

            if (cs->ws->info.has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ring_type == RING_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
                               radeon_drm_cs_emit_ioctl_oneshot, NULL);
            if (!(flags & RADEON_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.current.buf = cs->csc->buf;
    cs->base.current.cdw = 0;
    cs->base.used_vram = 0;
    cs->base.used_gart = 0;

    cs->ws->num_cs_flushes++;
    return 0;
}
예제 #20
0
/* This functions is used to draw a rectangle for the blitter module.
 *
 * If we rendered a quad, the pixels on the main diagonal
 * would be computed and stored twice, which makes the clear/copy codepaths
 * somewhat inefficient. Instead we use a rectangular point sprite. */
static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
                                        unsigned x1, unsigned y1,
                                        unsigned x2, unsigned y2,
                                        float depth,
                                        enum blitter_attrib_type type,
                                        const float attrib[4])
{
    struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
    unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
    unsigned width = x2 - x1;
    unsigned height = y2 - y1;
    unsigned vertex_size =
            type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
    unsigned dwords = 13 + vertex_size +
                      (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
    const float zeros[4] = {0, 0, 0, 0};
    CS_LOCALS(r300);

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
        r300->sprite_coord_enable = 1;

    r300_update_derived_state(r300);

    /* Mark some states we don't care about as non-dirty. */
    r300->clip_state.dirty = FALSE;
    r300->viewport_state.dirty = FALSE;

    if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
        goto done;

    DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");

    BEGIN_CS(dwords);
    /* Set up GA. */
    OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));

    if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
        /* Set up the GA to generate texcoords. */
        OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
                   (R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
        OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
        OUT_CS_32F(attrib[0]);
        OUT_CS_32F(attrib[3]);
        OUT_CS_32F(attrib[2]);
        OUT_CS_32F(attrib[1]);
    }

    /* Set up VAP controls. */
    OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
    OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(1);
    OUT_CS(0);

    /* Draw. */
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (1 << 16) |
           R300_VAP_VF_CNTL__PRIM_POINTS);

    OUT_CS_32F(x1 + width * 0.5f);
    OUT_CS_32F(y1 + height * 0.5f);
    OUT_CS_32F(depth);
    OUT_CS_32F(1);

    if (vertex_size == 8) {
        if (!attrib)
            attrib = zeros;
        OUT_CS_TABLE(attrib, 4);
    }
    END_CS;

done:
    /* Restore the state. */
    r300->clip_state.dirty = TRUE;
    r300->rs_state.dirty = TRUE;
    r300->viewport_state.dirty = TRUE;

    r300->sprite_coord_enable = last_sprite_coord_enable;
}
예제 #21
0
static void r300_draw_arrays_immediate(struct r300_context *r300,
                                       const struct pipe_draw_info *info)
{
    struct pipe_vertex_element* velem;
    struct pipe_vertex_buffer* vbuf;
    unsigned vertex_element_count = r300->velems->count;
    unsigned i, v, vbi;

    /* Size of the vertex, in dwords. */
    unsigned vertex_size = r300->velems->vertex_size_dwords;

    /* The number of dwords for this draw operation. */
    unsigned dwords = 4 + info->count * vertex_size;

    /* Size of the vertex element, in dwords. */
    unsigned size[PIPE_MAX_ATTRIBS];

    /* Stride to the same attrib in the next vertex in the vertex buffer,
     * in dwords. */
    unsigned stride[PIPE_MAX_ATTRIBS];

    /* Mapped vertex buffers. */
    uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
    uint32_t* mapelem[PIPE_MAX_ATTRIBS];

    CS_LOCALS(r300);

    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
        return;

    /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
    for (i = 0; i < vertex_element_count; i++) {
        velem = &r300->velems->velem[i];
        size[i] = r300->velems->format_size[i] / 4;
        vbi = velem->vertex_buffer_index;
        vbuf = &r300->vertex_buffer[vbi];
        stride[i] = vbuf->stride / 4;

        /* Map the buffer. */
        if (!map[vbi]) {
            map[vbi] = (uint32_t*)r300->rws->buffer_map(
                r300_resource(vbuf->buffer)->cs_buf,
                r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
            map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start;
        }
        mapelem[i] = map[vbi] + (velem->src_offset / 4);
    }

    r300_emit_draw_init(r300, info->mode, info->count-1);

    BEGIN_CS(dwords);
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, info->count * vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (info->count << 16) |
            r300_translate_primitive(info->mode));

    /* Emit vertices. */
    for (v = 0; v < info->count; v++) {
        for (i = 0; i < vertex_element_count; i++) {
            OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
        }
    }
    END_CS;
}
예제 #22
0
static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
                                            unsigned mode,
                                            unsigned start,
                                            unsigned count)
{
    struct pipe_vertex_element* velem;
    struct pipe_vertex_buffer* vbuf;
    unsigned vertex_element_count = r300->velems->count;
    unsigned i, v, vbi;

    /* Size of the vertex, in dwords. */
    unsigned vertex_size = r300->velems->vertex_size_dwords;

    /* The number of dwords for this draw operation. */
    unsigned dwords = 9 + count * vertex_size;

    /* Size of the vertex element, in dwords. */
    unsigned size[PIPE_MAX_ATTRIBS];

    /* Stride to the same attrib in the next vertex in the vertex buffer,
     * in dwords. */
    unsigned stride[PIPE_MAX_ATTRIBS];

    /* Mapped vertex buffers. */
    uint32_t* map[PIPE_MAX_ATTRIBS];
    uint32_t* mapelem[PIPE_MAX_ATTRIBS];
    struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0};

    CS_LOCALS(r300);

    if (!r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0))
        return;

    /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
    for (i = 0; i < vertex_element_count; i++) {
        velem = &r300->velems->velem[i];
        size[i] = r300->velems->hw_format_size[i] / 4;
        vbi = velem->vertex_buffer_index;
        vbuf = &r300->vertex_buffer[vbi];
        stride[i] = vbuf->stride / 4;

        /* Map the buffer. */
        if (!transfer[vbi]) {
            map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
                                                  vbuf->buffer,
                                                  PIPE_TRANSFER_READ,
						  &transfer[vbi]);
            map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start;
        }
        mapelem[i] = map[vbi] + (velem->src_offset / 4);
    }

    BEGIN_CS(dwords);
    OUT_CS_REG(R300_GA_COLOR_CONTROL,
            r300_provoking_vertex_fixes(r300, mode));
    OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size);
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(count - 1);
    OUT_CS(0);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
            r300_translate_primitive(mode));

    /* Emit vertices. */
    for (v = 0; v < count; v++) {
        for (i = 0; i < vertex_element_count; i++) {
            OUT_CS_TABLE(&mapelem[i][stride[i] * v], size[i]);
        }
    }
    END_CS;

    /* Unmap buffers. */
    for (i = 0; i < vertex_element_count; i++) {
        vbi = r300->velems->velem[i].vertex_buffer_index;

        if (transfer[vbi]) {
            vbuf = &r300->vertex_buffer[vbi];
            pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
            transfer[vbi] = NULL;
        }
    }
}
예제 #23
0
static void r300_draw_elements_immediate(struct r300_context *r300,
                                         const struct pipe_draw_info *info)
{
    const uint8_t *ptr1;
    const uint16_t *ptr2;
    const uint32_t *ptr4;
    unsigned index_size = r300->index_buffer.index_size;
    unsigned i, count_dwords = index_size == 4 ? info->count :
                                                 (info->count + 1) / 2;
    CS_LOCALS(r300);

    /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
    if (!r300_prepare_for_rendering(r300,
            PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_VARRAYS |
            PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1))
        return;

    r300_emit_draw_init(r300, info->mode, info->max_index);

    BEGIN_CS(2 + count_dwords);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count_dwords);

    switch (index_size) {
    case 1:
        ptr1 = (uint8_t*)r300->index_buffer.user_buffer;
        ptr1 += info->start;

        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
               r300_translate_primitive(info->mode));

        if (info->index_bias && !r300->screen->caps.is_r500) {
            for (i = 0; i < info->count-1; i += 2)
                OUT_CS(((ptr1[i+1] + info->index_bias) << 16) |
                        (ptr1[i]   + info->index_bias));

            if (info->count & 1)
                OUT_CS(ptr1[i] + info->index_bias);
        } else {
            for (i = 0; i < info->count-1; i += 2)
                OUT_CS(((ptr1[i+1]) << 16) |
                        (ptr1[i]  ));

            if (info->count & 1)
                OUT_CS(ptr1[i]);
        }
        break;

    case 2:
        ptr2 = (uint16_t*)r300->index_buffer.user_buffer;
        ptr2 += info->start;

        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
               r300_translate_primitive(info->mode));

        if (info->index_bias && !r300->screen->caps.is_r500) {
            for (i = 0; i < info->count-1; i += 2)
                OUT_CS(((ptr2[i+1] + info->index_bias) << 16) |
                        (ptr2[i]   + info->index_bias));

            if (info->count & 1)
                OUT_CS(ptr2[i] + info->index_bias);
        } else {
            OUT_CS_TABLE(ptr2, count_dwords);
        }
        break;

    case 4:
        ptr4 = (uint32_t*)r300->index_buffer.user_buffer;
        ptr4 += info->start;

        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (info->count << 16) |
               R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
               r300_translate_primitive(info->mode));

        if (info->index_bias && !r300->screen->caps.is_r500) {
            for (i = 0; i < info->count; i++)
                OUT_CS(ptr4[i] + info->index_bias);
        } else {
            OUT_CS_TABLE(ptr4, count_dwords);
        }
        break;
    }
    END_CS;
}
예제 #24
0
static void r300_emit_draw_elements(struct r300_context *r300,
                                    struct pipe_resource* indexBuffer,
                                    unsigned indexSize,
                                    unsigned minIndex,
                                    unsigned maxIndex,
                                    unsigned mode,
                                    unsigned start,
                                    unsigned count)
{
    uint32_t count_dwords;
    uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
    boolean alt_num_verts = count > 65535;
    CS_LOCALS(r300);

    if (count >= (1 << 24)) {
        fprintf(stderr, "r300: Got a huge number of vertices: %i, "
                "refusing to render.\n", count);
        return;
    }

    maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);

    DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
        count, minIndex, maxIndex);

    BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
    if (alt_num_verts) {
        OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
    }
    OUT_CS_REG(R300_GA_COLOR_CONTROL,
            r300_provoking_vertex_fixes(r300, mode));
    OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
    OUT_CS(maxIndex);
    OUT_CS(minIndex);
    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
    if (indexSize == 4) {
        count_dwords = count;
        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
               R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
               r300_translate_primitive(mode) |
               (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
    } else {
        count_dwords = (count + 1) / 2;
        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
               r300_translate_primitive(mode) |
               (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
    }

    /* INDX_BUFFER is a truly special packet3.
     * Unlike most other packet3, where the offset is after the count,
     * the order is reversed, so the relocation ends up carrying the
     * size of the indexbuf instead of the offset.
     */
    OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
    OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
           (0 << R300_INDX_BUFFER_SKIP_SHIFT));
    OUT_CS(offset_dwords << 2);
    OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
                     r300_buffer(indexBuffer)->domain, 0);

    END_CS;
}
예제 #25
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (flags & RADEON_FLUSH_COMPUTE) {
            if (cs->ws->info.chip_class <= SI) {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
            } else {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
            }
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        }
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
        fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            radeon_drm_ws_queue_cs(cs->ws, cs);
        } else {
            pipe_mutex_lock(cs->ws->cs_stack_lock);
            if (cs->ws->thread) {
                while (p_atomic_read(&cs->ws->ncs)) {
                    pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock);
                }
            }
            pipe_mutex_unlock(cs->ws->cs_stack_lock);
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
예제 #26
0
static void r300_render_draw_elements(struct vbuf_render* render,
                                      const ushort* indices,
                                      uint count)
{
    struct r300_render* r300render = r300_render(render);
    struct r300_context* r300 = r300render->r300;
    int i;
    unsigned end_cs_dwords;
    unsigned max_index = (r300->draw_vbo_size - r300->draw_vbo_offset) /
                         (r300render->r300->vertex_info.size * 4) - 1;
    unsigned short_count;
    unsigned free_dwords;

    CS_LOCALS(r300);
    DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count);

    if (r300->draw_first_emitted) {
        if (!r300_prepare_for_rendering(r300,
                PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
                NULL, 256, 0, 0, -1))
            return;
    } else {
        if (!r300_emit_states(r300,
                PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
                NULL, 0, 0, -1))
            return;
    }

    /* Below we manage the CS space manually because there may be more
     * indices than it can fit in CS. */

    end_cs_dwords = r300_get_num_cs_end_dwords(r300);

    while (count) {
        free_dwords = RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw;

        short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2);

        BEGIN_CS(6 + (short_count+1)/2);
        OUT_CS_REG(R300_GA_COLOR_CONTROL,
                r300_provoking_vertex_fixes(r300, r300render->prim));
        OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index);
        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2);
        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) |
               r300render->hwprim);
        for (i = 0; i < short_count-1; i += 2) {
            OUT_CS(indices[i+1] << 16 | indices[i]);
        }
        if (short_count % 2) {
            OUT_CS(indices[short_count-1]);
        }
        END_CS;

        /* OK now subtract the emitted indices and see if we need to emit
         * another draw packet. */
        indices += short_count;
        count -= short_count;

        if (count) {
            if (!r300_prepare_for_rendering(r300,
                    PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED,
                    NULL, 256, 0, 0, -1))
                return;

            end_cs_dwords = r300_get_num_cs_end_dwords(r300);
        }
    }

    r300->draw_first_emitted = TRUE;
}