static void upload_invarient_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); ps.header.opcode = CMD_PIPELINE_SELECT(brw); ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); /* Disable depth offset clamping. */ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; gdo.header.length = sizeof(gdo)/4 - 2; gdo.depth_offset_clamp = 0.0; BRW_BATCH_STRUCT(brw, &gdo); } /* 0x61020000 State Instruction Pointer */ { struct brw_system_instruction_pointer sip; memset(&sip, 0, sizeof(sip)); sip.header.opcode = CMD_STATE_INSN_POINTER; sip.header.length = 0; sip.bits0.pad = 0; sip.bits0.system_instruction_pointer = 0; BRW_BATCH_STRUCT(brw, &sip); } { struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); vfs.opcode = CMD_VF_STATISTICS(brw); if (INTEL_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; BRW_BATCH_STRUCT(brw, &vfs); } }
void brw_upload_urb_fence(struct brw_context *brw) { struct brw_urb_fence uf; memset(&uf, 0, sizeof(uf)); uf.header.opcode = CMD_URB_FENCE; uf.header.length = sizeof(uf)/4-2; uf.header.vs_realloc = 1; uf.header.gs_realloc = 1; uf.header.clp_realloc = 1; uf.header.sf_realloc = 1; uf.header.vfe_realloc = 1; uf.header.cs_realloc = 1; /* The ordering below is correct, not the layout in the * instruction. * * There are 256/384 urb reg pairs in total. */ uf.bits0.vs_fence = brw->urb.gs_start; uf.bits0.gs_fence = brw->urb.clip_start; uf.bits0.clp_fence = brw->urb.sf_start; uf.bits1.sf_fence = brw->urb.cs_start; uf.bits1.cs_fence = brw->urb.size; /* erratum: URB_FENCE must not cross a 64byte cacheline */ if ((brw->batch.used & 15) > 12) { int pad = 16 - (brw->batch.used & 15); do brw->batch.map[brw->batch.used++] = MI_NOOP; while (--pad); } BRW_BATCH_STRUCT(brw, &uf); }
void brw_upload_urb_fence(struct brw_context *brw) { struct brw_urb_fence uf; memset(&uf, 0, sizeof(uf)); uf.header.opcode = CMD_URB_FENCE; uf.header.length = sizeof(uf)/4-2; uf.header.vs_realloc = 1; uf.header.gs_realloc = 1; uf.header.clp_realloc = 1; uf.header.sf_realloc = 1; uf.header.vfe_realloc = 1; uf.header.cs_realloc = 1; /* The ordering below is correct, not the layout in the * instruction. * * There are 256/384 urb reg pairs in total. */ uf.bits0.vs_fence = brw->urb.gs_start; uf.bits0.gs_fence = brw->urb.clip_start; uf.bits0.clp_fence = brw->urb.sf_start; uf.bits1.sf_fence = brw->urb.cs_start; uf.bits1.cs_fence = brw->urb.size; BRW_BATCH_STRUCT(brw, &uf); }
void brw_do_flush( struct brw_context *brw, GLuint flags ) { struct brw_mi_flush flush; memset(&flush, 0, sizeof(flush)); flush.opcode = CMD_MI_FLUSH; flush.flags = flags; BRW_BATCH_STRUCT(brw, &flush); }
static void upload_pipe_control(struct brw_context *brw) { struct brw_pipe_control pc; return; memset(&pc, 0, sizeof(pc)); pc.header.opcode = CMD_PIPE_CONTROL; pc.header.length = sizeof(pc)/4 - 2; pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; pc.header.instruction_state_cache_flush_enable = 1; pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; BRW_BATCH_STRUCT(brw, &pc); }
/*********************************************************************** * Drawing rectangle -- Need for AUB file only. */ static void upload_drawing_rect(struct brw_context *brw) { struct intel_context *intel = &brw->intel; __DRIdrawablePrivate *dPriv = intel->driDrawable; struct brw_drawrect bdr; int x1, y1; int x2, y2; /* If there is a single cliprect, set it here. Otherwise iterate * over them in brw_draw_prim(). */ if (brw->intel.numClipRects > 1) return; x1 = brw->intel.pClipRects[0].x1; y1 = brw->intel.pClipRects[0].y1; x2 = brw->intel.pClipRects[0].x2; y2 = brw->intel.pClipRects[0].y2; if (x1 < 0) x1 = 0; if (y1 < 0) y1 = 0; if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; memset(&bdr, 0, sizeof(bdr)); bdr.header.opcode = CMD_DRAW_RECT; bdr.header.length = sizeof(bdr)/4 - 2; bdr.xmin = x1; bdr.ymin = y1; bdr.xmax = x2; bdr.ymax = y2; bdr.xorg = dPriv->x; bdr.yorg = dPriv->y; /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted * uncached in brw_draw.c: */ BRW_BATCH_STRUCT(brw, &bdr); }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; GLuint sz = brw->curbe.total_size; GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; /* Update our own dependency flags. This works because this * function will also be called whenever fp or vp changes. */ brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); brw->curbe.tracked_state.dirty.mesa |= vp->param_state; brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { struct brw_constant_buffer cb; cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 0; cb.bits0.buffer_length = 0; cb.bits0.buffer_address = 0; BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (GLfloat *)malloc(bufsz); memset(buf, 0, bufsz); if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = brw->wm.prog_data->param[i][0]; } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0]; buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; i++; } } } if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = vp->program.Base.Parameters->NumParameters; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); for (i = 0; i < nr; i++) { buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; } } if (0) { for (i = 0; i < sz*16; i+=4) _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); /* return; */ } else { if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (!brw_pool_alloc(pool, bufsz, 6, &brw->curbe.gs_offset)) { _mesa_printf("out of GS memory for curbe\n"); assert(0); return; } /* Copy data to the buffer: */ bmBufferSubDataAUB(&brw->intel, pool->buffer, brw->curbe.gs_offset, bufsz, buf, DW_CONSTANT_BUFFER, 0); } /* TODO: only emit the constant_buffer packet when necessary, ie: - contents have changed - offset has changed - hw requirements due to other packets emitted. */ { struct brw_constant_buffer cb; memset(&cb, 0, sizeof(cb)); cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 1; cb.bits0.buffer_length = sz - 1; cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ /* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ BRW_BATCH_STRUCT(brw, &cb); /* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ } }
static int upload_invarient_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ps.header.opcode = CMD_PIPELINE_SELECT_GM45; else ps.header.opcode = CMD_PIPELINE_SELECT_965; ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); /* Disable depth offset clamping. */ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; gdo.header.length = sizeof(gdo)/4 - 2; gdo.depth_offset_clamp = 0.0; BRW_BATCH_STRUCT(brw, &gdo); } /* 0x61020000 State Instruction Pointer */ { struct brw_system_instruction_pointer sip; memset(&sip, 0, sizeof(sip)); sip.header.opcode = CMD_STATE_INSN_POINTER; sip.header.length = 0; sip.bits0.pad = 0; sip.bits0.system_instruction_pointer = 0; BRW_BATCH_STRUCT(brw, &sip); } /* VF Statistics */ { struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) vfs.opcode = CMD_VF_STATISTICS_GM45; else vfs.opcode = CMD_VF_STATISTICS_965; if (BRW_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; BRW_BATCH_STRUCT(brw, &vfs); } if (!BRW_IS_965(brw)) { struct brw_aa_line_parameters balp; /* use legacy aa line coverage computation */ memset(&balp, 0, sizeof(balp)); balp.header.opcode = CMD_AA_LINE_PARAMETERS; balp.header.length = sizeof(balp) / 4 - 2; BRW_BATCH_STRUCT(brw, &balp); } { struct brw_polygon_stipple_offset bpso; /* This is invarient state in gallium: */ memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; bpso.bits0.y_offset = 0; bpso.bits0.x_offset = 0; BRW_BATCH_STRUCT(brw, &bpso); } return 0; }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; unsigned sz = brw->curbe.total_size; unsigned bufsz = sz * sizeof(float); float *buf; unsigned i; if (sz == 0) { struct brw_constant_buffer cb; cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 0; cb.bits0.buffer_length = 0; cb.bits0.buffer_address = 0; BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (float *)malloc(bufsz); memset(buf, 0, bufsz); if (brw->curbe.wm_size) { unsigned offset = brw->curbe.wm_start * 16; /* First the constant buffer constants: */ /* Then any internally generated constants: */ for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) buf[offset + i] = brw->wm.prog_data->internal_const[i]; assert(brw->wm.prog_data->max_const == brw->wm.prog_data->nr_internal_consts); } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { unsigned offset = brw->curbe.clip_start * 16; unsigned j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: BRW_NEW_CLIP: */ for (j = 0; j < brw->attribs.Clip.nr; j++) { buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; i++; } } if (brw->curbe.vs_size) { unsigned offset = brw->curbe.vs_start * 16; /*unsigned nr = vp->max_const;*/ const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; struct pipe_winsys *ws = brw->pipe.winsys; /* FIXME: buffer size is num_consts + num_immediates */ if (brw->vs.prog_data->num_consts) { /* map the vertex constant buffer and copy to curbe: */ void *data = ws->buffer_map(ws, cbuffer->buffer, 0); /* FIXME: this is wrong. the cbuffer->buffer->size currently * represents size of consts + immediates. so if we'll * have both we'll copy over the end of the buffer * with the subsequent memcpy */ memcpy(&buf[offset], data, cbuffer->buffer->size); ws->buffer_unmap(ws, cbuffer->buffer); offset += cbuffer->buffer->size; } /*immediates*/ if (brw->vs.prog_data->num_imm) { memcpy(&buf[offset], brw->vs.prog_data->imm_buf, brw->vs.prog_data->num_imm * 4 * sizeof(float)); } } if (1) { for (i = 0; i < sz; i+=4) debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); /* return; */ } else { if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (!brw_pool_alloc(pool, bufsz, 1 << 6, &brw->curbe.gs_offset)) { debug_printf("out of GS memory for curbe\n"); assert(0); return; } /* Copy data to the buffer: */ brw->winsys->buffer_subdata_typed(brw->winsys, pool->buffer, brw->curbe.gs_offset, bufsz, buf, BRW_CONSTANT_BUFFER ); } /* TODO: only emit the constant_buffer packet when necessary, ie: - contents have changed - offset has changed - hw requirements due to other packets emitted. */ { struct brw_constant_buffer cb; memset(&cb, 0, sizeof(cb)); cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 1; cb.bits0.buffer_length = sz - 1; cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ BRW_BATCH_STRUCT(brw, &cb); } }