/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; GLuint sz = brw->curbe.total_size; GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; /* Update our own dependency flags. This works because this * function will also be called whenever fp or vp changes. */ brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); brw->curbe.tracked_state.dirty.mesa |= vp->param_state; brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { struct brw_constant_buffer cb; cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 0; cb.bits0.buffer_length = 0; cb.bits0.buffer_address = 0; BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (GLfloat *)malloc(bufsz); memset(buf, 0, bufsz); if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = brw->wm.prog_data->param[i][0]; } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0]; buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; i++; } } } if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = vp->program.Base.Parameters->NumParameters; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); for (i = 0; i < nr; i++) { buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; } } if (0) { for (i = 0; i < sz*16; i+=4) _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); /* return; */ } else { if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (!brw_pool_alloc(pool, bufsz, 6, &brw->curbe.gs_offset)) { _mesa_printf("out of GS memory for curbe\n"); assert(0); return; } /* Copy data to the buffer: */ bmBufferSubDataAUB(&brw->intel, pool->buffer, brw->curbe.gs_offset, bufsz, buf, DW_CONSTANT_BUFFER, 0); } /* TODO: only emit the constant_buffer packet when necessary, ie: - contents have changed - offset has changed - hw requirements due to other packets emitted. */ { struct brw_constant_buffer cb; memset(&cb, 0, sizeof(cb)); cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 1; cb.bits0.buffer_length = sz - 1; cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ /* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ BRW_BATCH_STRUCT(brw, &cb); /* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ } }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; unsigned sz = brw->curbe.total_size; unsigned bufsz = sz * sizeof(float); float *buf; unsigned i; if (sz == 0) { struct brw_constant_buffer cb; cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 0; cb.bits0.buffer_length = 0; cb.bits0.buffer_address = 0; BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (float *)malloc(bufsz); memset(buf, 0, bufsz); if (brw->curbe.wm_size) { unsigned offset = brw->curbe.wm_start * 16; /* First the constant buffer constants: */ /* Then any internally generated constants: */ for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) buf[offset + i] = brw->wm.prog_data->internal_const[i]; assert(brw->wm.prog_data->max_const == brw->wm.prog_data->nr_internal_consts); } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { unsigned offset = brw->curbe.clip_start * 16; unsigned j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: BRW_NEW_CLIP: */ for (j = 0; j < brw->attribs.Clip.nr; j++) { buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; i++; } } if (brw->curbe.vs_size) { unsigned offset = brw->curbe.vs_start * 16; /*unsigned nr = vp->max_const;*/ const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; struct pipe_winsys *ws = brw->pipe.winsys; /* FIXME: buffer size is num_consts + num_immediates */ if (brw->vs.prog_data->num_consts) { /* map the vertex constant buffer and copy to curbe: */ void *data = ws->buffer_map(ws, cbuffer->buffer, 0); /* FIXME: this is wrong. the cbuffer->buffer->size currently * represents size of consts + immediates. so if we'll * have both we'll copy over the end of the buffer * with the subsequent memcpy */ memcpy(&buf[offset], data, cbuffer->buffer->size); ws->buffer_unmap(ws, cbuffer->buffer); offset += cbuffer->buffer->size; } /*immediates*/ if (brw->vs.prog_data->num_imm) { memcpy(&buf[offset], brw->vs.prog_data->imm_buf, brw->vs.prog_data->num_imm * 4 * sizeof(float)); } } if (1) { for (i = 0; i < sz; i+=4) debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); /* return; */ } else { if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (!brw_pool_alloc(pool, bufsz, 1 << 6, &brw->curbe.gs_offset)) { debug_printf("out of GS memory for curbe\n"); assert(0); return; } /* Copy data to the buffer: */ brw->winsys->buffer_subdata_typed(brw->winsys, pool->buffer, brw->curbe.gs_offset, bufsz, buf, BRW_CONSTANT_BUFFER ); } /* TODO: only emit the constant_buffer packet when necessary, ie: - contents have changed - offset has changed - hw requirements due to other packets emitted. */ { struct brw_constant_buffer cb; memset(&cb, 0, sizeof(cb)); cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 1; cb.bits0.buffer_length = sz - 1; cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ BRW_BATCH_STRUCT(brw, &cb); } }