/** * Update state for running fragment programs. Basically, load the * program parameters with current state values. */ static void _swrast_update_fragment_program( GLcontext *ctx ) { if (ctx->FragmentProgram._Enabled) { struct fragment_program *program = ctx->FragmentProgram.Current; _mesa_load_state_parameters(ctx, program->Parameters); } }
/** * Update state for running fragment programs. Basically, load the * program parameters with current state values. */ static void _swrast_update_fragment_program(struct gl_context *ctx, GLbitfield newState) { if (!_swrast_use_fragment_program(ctx)) return; _mesa_load_state_parameters(ctx, ctx->FragmentProgram._Current->Base.Parameters); }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. * \param shader_type either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, unsigned shader_type) { assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY); /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; /* Update the constants which come from fixed-function state, such as * transformation matrices, fog factors, etc. The rest of the values in * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. * Let's use a user buffer to avoid an unnecessary copy. */ if (st->constbuf_uploader) { cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, paramBytes, params->ParameterValues, &cb.buffer_offset, &cb.buffer); u_upload_unmap(st->constbuf_uploader); } else { cb.buffer = NULL; cb.user_buffer = params->ParameterValues; cb.buffer_offset = 0; } cb.buffer_size = paramBytes; if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __FUNCTION__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } cso_set_constant_buffer(st->cso_context, shader_type, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); st->state.constants[shader_type].ptr = params->ParameterValues; st->state.constants[shader_type].size = paramBytes; } else if (st->state.constants[shader_type].ptr) { /* Unbind. */ st->state.constants[shader_type].ptr = NULL; st->state.constants[shader_type].size = 0; cso_set_constant_buffer(st->cso_context, shader_type, 0, NULL); } }
void brw_upload_vec4_pull_constants(struct brw_context *brw, GLbitfield brw_new_constbuf, const struct gl_program *prog, struct brw_stage_state *stage_state, const struct brw_vec4_prog_data *prog_data) { int i; uint32_t surf_index = prog_data->base.binding_table.pull_constants_start; /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(&brw->ctx, prog->Parameters); if (!prog_data->nr_pull_params) { if (stage_state->const_bo) { drm_intel_bo_unreference(stage_state->const_bo); stage_state->const_bo = NULL; stage_state->surf_offset[surf_index] = 0; brw->state.dirty.brw |= brw_new_constbuf; } return; } /* _NEW_PROGRAM_CONSTANTS */ drm_intel_bo_unreference(stage_state->const_bo); uint32_t size = prog_data->nr_pull_params * 4; stage_state->const_bo = drm_intel_bo_alloc(brw->bufmgr, "vec4_const_buffer", size, 64); drm_intel_gem_bo_map_gtt(stage_state->const_bo); for (i = 0; i < prog_data->nr_pull_params; i++) { memcpy(stage_state->const_bo->virtual + i * 4, prog_data->pull_param[i], 4); } if (0) { for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) { float *row = (float *)stage_state->const_bo->virtual + i * 4; printf("const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", i, row[0], row[1], row[2], row[3]); } } drm_intel_gem_bo_unmap_gtt(stage_state->const_bo); brw_create_constant_surface(brw, stage_state->const_bo, 0, size, &stage_state->surf_offset[surf_index], false); brw->state.dirty.brw |= brw_new_constbuf; }
static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp) { r200ContextPtr rmesa = R200_CONTEXT( ctx ); GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1]; int pi; struct gl_vertex_program *mesa_vp = &vp->mesa_program; struct gl_program_parameter_list *paramList; drm_radeon_cmd_header_t tmp; R200_STATECHANGE( rmesa, vpp[0] ); R200_STATECHANGE( rmesa, vpp[1] ); assert(mesa_vp->Base.Parameters); _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); paramList = mesa_vp->Base.Parameters; if(paramList->NumParameters > R200_VSF_MAX_PARAM){ fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); return GL_FALSE; } for(pi = 0; pi < paramList->NumParameters; pi++) { switch(paramList->Parameters[pi].Type) { case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: *fcmd++ = paramList->ParameterValues[pi][0]; *fcmd++ = paramList->ParameterValues[pi][1]; *fcmd++ = paramList->ParameterValues[pi][2]; *fcmd++ = paramList->ParameterValues[pi][3]; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); break; } if (pi == 95) { fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1]; } } /* hack up the cmd_size so not the whole state atom is emitted always. */ rmesa->hw.vpp[0].cmd_size = 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters); tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0]; tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters; rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i; if (paramList->NumParameters > 96) { rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96); tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0]; tmp.veclinear.count = paramList->NumParameters - 96; rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i; } return GL_TRUE; }
/** * Creates a temporary BO containing the pull constant data for the shader * stage, and the SURFACE_STATE struct that points at it. * * Pull constants are GLSL uniforms (and other constant data) beyond what we * could fit as push constants, or that have variable-index array access * (which is easiest to support using pull constants, and avoids filling * register space with mostly-unused data). * * Compare this path to brw_curbe.c for gen4/5 push constants, and * gen6_vs_state.c for gen6+ push constants. */ void brw_upload_pull_constants(struct brw_context *brw, GLbitfield brw_new_constbuf, const struct gl_program *prog, struct brw_stage_state *stage_state, const struct brw_stage_prog_data *prog_data, bool dword_pitch) { int i; uint32_t surf_index = prog_data->binding_table.pull_constants_start; if (!prog_data->nr_pull_params) { if (stage_state->surf_offset[surf_index]) { stage_state->surf_offset[surf_index] = 0; brw->ctx.NewDriverState |= brw_new_constbuf; } return; } /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(&brw->ctx, prog->Parameters); /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */ uint32_t size = prog_data->nr_pull_params * 4; drm_intel_bo *const_bo = NULL; uint32_t const_offset; gl_constant_value *constants = intel_upload_space(brw, size, 64, &const_bo, &const_offset); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); for (i = 0; i < prog_data->nr_pull_params; i++) { constants[i] = *prog_data->pull_param[i]; } if (0) { for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) { const gl_constant_value *row = &constants[i * 4]; fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", i, row[0].f, row[1].f, row[2].f, row[3].f); } } brw_create_constant_surface(brw, const_bo, const_offset, size, &stage_state->surf_offset[surf_index], dword_pitch); drm_intel_bo_unreference(const_bo); brw->ctx.NewDriverState |= brw_new_constbuf; }
static void gen6_upload_wm_push_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); /* CACHE_NEW_WM_PROG */ const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; /* Updates the ParameterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); if (prog_data->base.nr_params == 0) { brw->wm.base.push_const_size = 0; } else { float *constants; unsigned int i; constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, prog_data->base.nr_params * sizeof(float), 32, &brw->wm.base.push_const_offset); for (i = 0; i < prog_data->base.nr_params; i++) { constants[i] = *prog_data->base.param[i]; } if (0) { fprintf(stderr, "WM constants:\n"); for (i = 0; i < prog_data->base.nr_params; i++) { if ((i & 7) == 0) fprintf(stderr, "g%d: ", prog_data->first_curbe_grf + i / 8); fprintf(stderr, "%8f ", constants[i]); if ((i & 7) == 7) fprintf(stderr, "\n"); } if ((i & 7) != 0) fprintf(stderr, "\n"); fprintf(stderr, "\n"); } brw->wm.base.push_const_size = ALIGN(prog_data->base.nr_params, 8) / 8; } if (brw->gen >= 7) { gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); } }
/** * Update state for running fragment programs. Basically, load the * program parameters with current state values. */ static void _swrast_update_fragment_program(GLcontext *ctx, GLbitfield newState) { const struct gl_fragment_program *fp = ctx->FragmentProgram._Current; if (fp) { #if 0 /* XXX Need a way to trigger the initial loading of parameters * even when there's no recent state changes. */ if (fp->Base.Parameters->StateFlags & newState) #endif _mesa_load_state_parameters(ctx, fp->Base.Parameters); } }
static void track_params( struct i915_fragment_program *p ) { GLint i; if (p->nr_params) _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters); for (i = 0; i < p->nr_params; i++) { GLint reg = p->param[i].reg; COPY_4V( p->constant[reg], p->param[i].values ); } p->params_uptodate = 1; p->on_hardware = 0; /* overkill */ }
static void gen6_upload_wm_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); /* Updates the ParameterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params != 0) { float *constants; unsigned int i; constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], brw->wm.prog_data->param[i]); } if (0) { printf("WM constants:\n"); for (i = 0; i < brw->wm.prog_data->nr_params; i++) { if ((i & 7) == 0) printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8); printf("%8f ", constants[i]); if ((i & 7) == 7) printf("\n"); } if ((i & 7) != 0) printf("\n"); printf("\n"); } } }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. * \param shader_type either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, unsigned shader_type) { struct pipe_context *pipe = st->pipe; struct pipe_resource **cbuf = &st->state.constants[shader_type]; assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY); /* update constants */ if (params && params->NumParameters) { const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ pipe_resource_reference(cbuf, NULL ); *cbuf = pipe_buffer_create(pipe->screen, PIPE_BIND_CONSTANT_BUFFER, paramBytes ); if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __FUNCTION__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } /* load Mesa constants into the constant buffer */ pipe_buffer_write(st->pipe, *cbuf, 0, paramBytes, params->ParameterValues); st->pipe->set_constant_buffer(st->pipe, shader_type, 0, *cbuf); } else { st->constants.tracked_state[shader_type].dirty.mesa = 0x0; } }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. * \param id either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, unsigned id) { struct pipe_context *pipe = st->pipe; struct pipe_constant_buffer *cbuf = &st->state.constants[id]; assert(id == PIPE_SHADER_VERTEX || id == PIPE_SHADER_FRAGMENT); /* update constants */ if (params && params->NumParameters) { const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ pipe_buffer_reference(&cbuf->buffer, NULL ); cbuf->buffer = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); if (0) { printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __FUNCTION__, id, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } /* load Mesa constants into the constant buffer */ if (cbuf->buffer) st_no_flush_pipe_buffer_write(st, cbuf->buffer, 0, paramBytes, params->ParameterValues); st->pipe->set_constant_buffer(st->pipe, id, 0, cbuf); } else { st->constants.tracked_state[id].dirty.mesa = 0; // st->pipe->set_constant_buffer(st->pipe, id, 0, NULL); } }
/** * Write parameter array for the given vertex program into dst. * Return the total number of components written. */ static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) { int i; if (vp->Base->IsNVProgram) { _mesa_load_tracked_matrices(ctx); } else { if (vp->Base->Base.Parameters) { _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); } } for(i = 0; i < vp->code.constants.Count; ++i) { const float * src = 0; const struct rc_constant * constant = &vp->code.constants.Constants[i]; switch(constant->Type) { case RC_CONSTANT_EXTERNAL: if (vp->Base->IsNVProgram) { src = ctx->VertexProgram.Parameters[constant->u.External]; } else { src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; } break; case RC_CONSTANT_IMMEDIATE: src = constant->u.Immediate; break; } dst[4*i] = src[0]; dst[4*i + 1] = src[1]; dst[4*i + 2] = src[2]; dst[4*i + 3] = src[3]; } return 4 * vp->code.constants.Count; }
/** * Print all of a program's parameters/fields to given file. */ static void _mesa_fprint_program_parameters(FILE *f, struct gl_context *ctx, const struct gl_program *prog) { GLuint i; fprintf(f, "InputsRead: 0x%llx (0b%s)\n", (unsigned long long) prog->InputsRead, binary(prog->InputsRead)); fprintf(f, "OutputsWritten: 0x%llx (0b%s)\n", (unsigned long long)prog->OutputsWritten, binary(prog->OutputsWritten)); fprintf(f, "NumInstructions=%d\n", prog->NumInstructions); fprintf(f, "NumTemporaries=%d\n", prog->NumTemporaries); fprintf(f, "NumParameters=%d\n", prog->NumParameters); fprintf(f, "NumAttributes=%d\n", prog->NumAttributes); fprintf(f, "NumAddressRegs=%d\n", prog->NumAddressRegs); fprintf(f, "IndirectRegisterFiles: 0x%x (0b%s)\n", prog->IndirectRegisterFiles, binary(prog->IndirectRegisterFiles)); fprintf(f, "SamplersUsed: 0x%x (0b%s)\n", prog->SamplersUsed, binary(prog->SamplersUsed)); fprintf(f, "Samplers=[ "); for (i = 0; i < MAX_SAMPLERS; i++) { fprintf(f, "%d ", prog->SamplerUnits[i]); } fprintf(f, "]\n"); _mesa_load_state_parameters(ctx, prog->Parameters); #if 0 fprintf(f, "Local Params:\n"); for (i = 0; i < MAX_PROGRAM_LOCAL_PARAMS; i++){ const GLfloat *p = prog->LocalParams[i]; fprintf(f, "%2d: %f, %f, %f, %f\n", i, p[0], p[1], p[2], p[3]); } #endif _mesa_print_parameter_list(prog->Parameters); }
/** * This function executes vertex programs */ static GLboolean run_vp( struct gl_context *ctx, struct tnl_pipeline_stage *stage ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vp_stage_data *store = VP_STAGE_DATA(stage); struct vertex_buffer *VB = &tnl->vb; struct gl_vertex_program *program = ctx->VertexProgram._Current; struct gl_program_machine *machine = &store->machine; GLuint outputs[VARYING_SLOT_MAX], numOutputs; GLuint i, j; if (!program) return GL_TRUE; /* ARB program or vertex shader */ _mesa_load_state_parameters(ctx, program->Base.Parameters); /* make list of outputs to save some time below */ numOutputs = 0; for (i = 0; i < VARYING_SLOT_MAX; i++) { if (program->Base.OutputsWritten & BITFIELD64_BIT(i)) { outputs[numOutputs++] = i; } } /* Allocate result vectors. We delay this until now to avoid allocating * memory that would never be used if we don't run the software tnl pipeline. */ if (!store->results[0].storage) { for (i = 0; i < VARYING_SLOT_MAX; i++) { assert(!store->results[i].storage); _mesa_vector4f_alloc( &store->results[i], 0, VB->Size, 32 ); store->results[i].size = 4; } } map_textures(ctx, program); for (i = 0; i < VB->Count; i++) { GLuint attr; init_machine(ctx, machine, tnl->CurInstance); #if 0 printf("Input %d: %f, %f, %f, %f\n", i, VB->AttribPtr[0]->data[i][0], VB->AttribPtr[0]->data[i][1], VB->AttribPtr[0]->data[i][2], VB->AttribPtr[0]->data[i][3]); printf(" color: %f, %f, %f, %f\n", VB->AttribPtr[3]->data[i][0], VB->AttribPtr[3]->data[i][1], VB->AttribPtr[3]->data[i][2], VB->AttribPtr[3]->data[i][3]); printf(" normal: %f, %f, %f, %f\n", VB->AttribPtr[2]->data[i][0], VB->AttribPtr[2]->data[i][1], VB->AttribPtr[2]->data[i][2], VB->AttribPtr[2]->data[i][3]); #endif /* the vertex array case */ for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { if (program->Base.InputsRead & BITFIELD64_BIT(attr)) { const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data; const GLuint size = VB->AttribPtr[attr]->size; const GLuint stride = VB->AttribPtr[attr]->stride; const GLfloat *data = (GLfloat *) (ptr + stride * i); #ifdef NAN_CHECK check_float(data[0]); check_float(data[1]); check_float(data[2]); check_float(data[3]); #endif COPY_CLEAN_4V(machine->VertAttribs[attr], size, data); } } /* execute the program */ _mesa_execute_program(ctx, &program->Base, machine); /* copy the output registers into the VB->attribs arrays */ for (j = 0; j < numOutputs; j++) { const GLuint attr = outputs[j]; #ifdef NAN_CHECK check_float(machine->Outputs[attr][0]); check_float(machine->Outputs[attr][1]); check_float(machine->Outputs[attr][2]); check_float(machine->Outputs[attr][3]); #endif COPY_4V(store->results[attr].data[i], machine->Outputs[attr]); } /* FOGC is a special case. Fragment shader expects (f,0,0,1) */ if (program->Base.OutputsWritten & BITFIELD64_BIT(VARYING_SLOT_FOGC)) { store->results[VARYING_SLOT_FOGC].data[i][1] = 0.0; store->results[VARYING_SLOT_FOGC].data[i][2] = 0.0; store->results[VARYING_SLOT_FOGC].data[i][3] = 1.0; } #ifdef NAN_CHECK ASSERT(machine->Outputs[0][3] != 0.0F); #endif #if 0 printf("HPOS: %f %f %f %f\n", machine->Outputs[0][0], machine->Outputs[0][1], machine->Outputs[0][2], machine->Outputs[0][3]); #endif } unmap_textures(ctx, program); if (program->IsPositionInvariant) { /* We need the exact same transform as in the fixed function path here * to guarantee invariance, depending on compiler optimization flags * results could be different otherwise. */ VB->ClipPtr = TransformRaw( &store->results[0], &ctx->_ModelProjectMatrix, VB->AttribPtr[0] ); /* Drivers expect this to be clean to element 4... */ switch (VB->ClipPtr->size) { case 1: /* impossible */ case 2: _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 ); /* fall-through */ case 3: _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 ); /* fall-through */ case 4: break; } } else { /* Setup the VB pointers so that the next pipeline stages get * their data from the right place (the program output arrays). */ VB->ClipPtr = &store->results[VARYING_SLOT_POS]; VB->ClipPtr->size = 4; VB->ClipPtr->count = VB->Count; } VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VARYING_SLOT_COL0]; VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VARYING_SLOT_COL1]; VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VARYING_SLOT_FOGC]; VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VARYING_SLOT_PSIZ]; VB->BackfaceColorPtr = &store->results[VARYING_SLOT_BFC0]; VB->BackfaceSecondaryColorPtr = &store->results[VARYING_SLOT_BFC1]; for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { VB->AttribPtr[_TNL_ATTRIB_TEX0 + i] = &store->results[VARYING_SLOT_TEX0 + i]; } for (i = 0; i < ctx->Const.MaxVarying; i++) { if (program->Base.OutputsWritten & BITFIELD64_BIT(VARYING_SLOT_VAR0 + i)) { /* Note: varying results get put into the generic attributes */ VB->AttribPtr[VERT_ATTRIB_GENERIC0+i] = &store->results[VARYING_SLOT_VAR0 + i]; } } /* Perform NDC and cliptest operations: */ return do_ndc_cliptest(ctx, store); }
/** * Gathers together all the uniform values into a block of memory to be * uploaded into the CURBE, then emits the state packet telling the hardware * the new location. */ static void brw_upload_constant_buffer(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_CURBE_OFFSETS */ const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); gl_constant_value *buf; GLuint i; gl_clip_plane *clip_planes; if (sz == 0) { goto emit; } buf = intel_upload_space(brw, bufsz, 64, &brw->curbe.curbe_bo, &brw->curbe.curbe_offset); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* fragment shader constants */ if (brw->curbe.wm_size) { _mesa_load_state_parameters(ctx, brw->fragment_program->Base.Parameters); /* BRW_NEW_CURBE_OFFSETS */ GLuint offset = brw->curbe.wm_start * 16; /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) { buf[offset + i] = *brw->wm.prog_data->base.param[i]; } } /* clipper constants */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0].f = fixed_plane[i][0]; buf[offset + i * 4 + 1].f = fixed_plane[i][1]; buf[offset + i * 4 + 2].f = fixed_plane[i][2]; buf[offset + i * 4 + 3].f = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ clip_planes = brw_select_clip_planes(ctx); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0].f = clip_planes[j][0]; buf[offset + i * 4 + 1].f = clip_planes[j][1]; buf[offset + i * 4 + 2].f = clip_planes[j][2]; buf[offset + i * 4 + 3].f = clip_planes[j][3]; i++; } } } /* vertex shader constants */ if (brw->curbe.vs_size) { _mesa_load_state_parameters(ctx, brw->vertex_program->Base.Parameters); GLuint offset = brw->curbe.vs_start * 16; /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) { buf[offset + i] = *brw->vs.prog_data->base.base.param[i]; } } if (0) { for (i = 0; i < sz*16; i+=4) fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f); } /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ emit: /* Work around mysterious 965 hangs that appear to happen if you do * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween. If we * haven't already flushed for some other reason, explicitly do so. * * We've found no documented reason why this should be necessary. */ if (brw->gen == 4 && !brw->is_g4x && (brw->ctx.NewDriverState & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) { BEGIN_BATCH(1); OUT_BATCH(MI_FLUSH); ADVANCE_BATCH(); } /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8 * (CONSTANT_BUFFER (CURBE Load)): * * "Modifying the CS URB allocation via URB_FENCE invalidates any * previous CURBE entries. Therefore software must subsequently * [re]issue a CONSTANT_BUFFER command before CURBE data can be used * in the pipeline." */ BEGIN_BATCH(2); if (brw->curbe.total_size == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void upload_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; GLuint sz = brw->curbe.total_size; GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; /* Update our own dependency flags. This works because this * function will also be called whenever fp or vp changes. */ brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); brw->curbe.tracked_state.dirty.mesa |= vp->param_state; brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { struct brw_constant_buffer cb; cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 0; cb.bits0.buffer_length = 0; cb.bits0.buffer_address = 0; BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (GLfloat *)malloc(bufsz); memset(buf, 0, bufsz); if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = brw->wm.prog_data->param[i][0]; } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0]; buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; i++; } } } if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = vp->program.Base.Parameters->NumParameters; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); for (i = 0; i < nr; i++) { buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; } } if (0) { for (i = 0; i < sz*16; i+=4) _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); /* return; */ } else { if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (!brw_pool_alloc(pool, bufsz, 6, &brw->curbe.gs_offset)) { _mesa_printf("out of GS memory for curbe\n"); assert(0); return; } /* Copy data to the buffer: */ bmBufferSubDataAUB(&brw->intel, pool->buffer, brw->curbe.gs_offset, bufsz, buf, DW_CONSTANT_BUFFER, 0); } /* TODO: only emit the constant_buffer packet when necessary, ie: - contents have changed - offset has changed - hw requirements due to other packets emitted. */ { struct brw_constant_buffer cb; memset(&cb, 0, sizeof(cb)); cb.header.opcode = CMD_CONST_BUFFER; cb.header.length = sizeof(cb)/4 - 2; cb.header.valid = 1; cb.bits0.buffer_length = sz - 1; cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ /* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ BRW_BATCH_STRUCT(brw, &cb); /* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ } }
/** * Creates a region containing the push constants for the CS on gen7+. * * Push constants are constant values (such as GLSL uniforms) that are * pre-loaded into a shader stage's register space at thread spawn time. * * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for * gen6+. */ static void brw_upload_cs_push_constants(struct brw_context *brw, const struct gl_program *prog, const struct brw_cs_prog_data *cs_prog_data, struct brw_stage_state *stage_state, enum aub_state_struct_type type) { struct gl_context *ctx = &brw->ctx; const struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data*) cs_prog_data; /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, prog->Parameters); if (cs_prog_data->push.total.size == 0) { stage_state->push_const_size = 0; return; } gl_constant_value *param = (gl_constant_value*) brw_state_batch(brw, type, ALIGN(cs_prog_data->push.total.size, 64), 64, &stage_state->push_const_offset); assert(param); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); if (cs_prog_data->push.cross_thread.size > 0) { gl_constant_value *param_copy = param; assert(cs_prog_data->thread_local_id_index < 0 || cs_prog_data->thread_local_id_index >= cs_prog_data->push.cross_thread.dwords); for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { param_copy[i] = *prog_data->param[i]; } } gl_constant_value thread_id; if (cs_prog_data->push.per_thread.size > 0) { for (unsigned t = 0; t < cs_prog_data->threads; t++) { unsigned dst = 8 * (cs_prog_data->push.per_thread.regs * t + cs_prog_data->push.cross_thread.regs); unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { if (src != cs_prog_data->thread_local_id_index) param[dst] = *prog_data->param[src]; else { thread_id.u = t * cs_prog_data->simd_size; param[dst] = thread_id; } } } } stage_state->push_const_size = cs_prog_data->push.cross_thread.regs + cs_prog_data->push.per_thread.regs; }
GLboolean r700SetupFragmentProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct r700_fragment_program *fp = (struct r700_fragment_program *) (ctx->FragmentProgram._Current); r700_AssemblerBase *pAsm = &(fp->r700AsmCode); struct gl_fragment_program *mesa_fp = &(fp->mesa_program); struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui, i; unsigned int unNumOfReg; unsigned int unBit; GLuint exportCount; GLboolean point_sprite = GL_FALSE; if(GL_FALSE == fp->loaded) { if(fp->r700Shader.bNeedsAssembly == GL_TRUE) { Assemble( &(fp->r700Shader) ); } /* Load fp to gpu */ r600EmitShader(ctx, &(fp->shaderbo), (GLvoid *)(fp->r700Shader.pProgram), fp->r700Shader.uShaderBinaryDWORDSize, "FS"); fp->loaded = GL_TRUE; } DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram), fp->r700Shader.uShaderBinaryDWORDSize); /* TODO : enable this after MemUse fixed *= (context->chipobj.MemUse)(context, fp->shadercode.buf->id); */ R600_STATECHANGE(context, ps); r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0; SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */ R600_STATECHANGE(context, spi); unNumOfReg = fp->r700Shader.nRegs + 1; ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift); /* PS uses fragment.position */ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) { ui += 1; SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask); SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } else { CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) { ui += 1; SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); } else { CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); } /* see if we need any point_sprite replacements, also increase num_interp * as there's no vp output for them */ if (ctx->Point.PointSprite) { for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++) { if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE) { ui++; point_sprite = GL_TRUE; } } } if( mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) ui++; if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite) { SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask); SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask); SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask); SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask); /* Like e.g. viewport and winding, point sprite coordinates are * inverted when rendering to FBO. */ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == !ctx->DrawBuffer->Name) SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); else CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit); } else { CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit); } ui = (unNumOfReg < ui) ? ui : unNumOfReg; SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit); if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ { SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize, STACK_SIZE_shift, STACK_SIZE_mask); } SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode, EXPORT_MODE_shift, EXPORT_MODE_mask); // emit ps input map struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) r700->SPI_PS_INPUT_CNTL[ui].u32All = 0; unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << VERT_RESULT_COL0; if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << VERT_RESULT_COL1; if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << VERT_RESULT_FOGC; if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } for(i=0; i<8; i++) { GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i]; unBit = 1 << (VERT_RESULT_TEX0 + i); if ((OutputsWritten & unBit) || coord_replace) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); /* ARB_point_sprite */ if (coord_replace) { SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); } } } unBit = 1 << FRAG_ATTRIB_FACE; if(mesa_fp->Base.InputsRead & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << FRAG_ATTRIB_PNTC; if(mesa_fp->Base.InputsRead & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); } for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) { unBit = 1 << i; if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); else CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } } exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) { R600_STATECHANGE(context, cb); r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; } /* sent out shader constants. */ paramList = fp->mesa_program.Base.Parameters; if(NULL != paramList) { _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) return GL_FALSE; R600_STATECHANGE(context, ps_consts); r700->ps.num_consts = paramList->NumParameters; unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } /* Load fp constants to gpu */ if( (GL_TRUE == r700->bShaderUseMemConstant) && (unNumParamData > 0) ) { r600EmitShader(ctx, &(fp->constbo0), (GLvoid *)&(paramList->ParameterValues[0][0]), unNumParamData * 4, "FS Const"); } } else r700->ps.num_consts = 0; COMPILED_SUB * pCompiledSub; GLuint uj; GLuint unConstOffset = r700->ps.num_consts; for(ui=0; ui<pAsm->unNumPresub; ui++) { pCompiledSub = pAsm->presubs[ui].pCompiledSub; r700->ps.num_consts += pCompiledSub->NumParameters; for(uj=0; uj<pCompiledSub->NumParameters; uj++) { r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; } unConstOffset += pCompiledSub->NumParameters; } return GL_TRUE; }
/** * Gathers together all the uniform values into a block of memory to be * uploaded into the CURBE, then emits the state packet telling the hardware * the new location. */ static void brw_upload_constant_buffer(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_CURBE_OFFSETS */ const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); gl_constant_value *buf; GLuint i; gl_clip_plane *clip_planes; if (sz == 0) { goto emit; } buf = intel_upload_space(brw, bufsz, 64, &brw->curbe.curbe_bo, &brw->curbe.curbe_offset); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* fragment shader constants */ if (brw->curbe.wm_size) { _mesa_load_state_parameters(ctx, brw->fragment_program->Parameters); /* BRW_NEW_CURBE_OFFSETS */ GLuint offset = brw->curbe.wm_start * 16; /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->wm.base.prog_data->nr_params; i++) { buf[offset + i] = *brw->wm.base.prog_data->param[i]; } } /* clipper constants */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLbitfield mask; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0].f = fixed_plane[i][0]; buf[offset + i * 4 + 1].f = fixed_plane[i][1]; buf[offset + i * 4 + 2].f = fixed_plane[i][2]; buf[offset + i * 4 + 3].f = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ clip_planes = brw_select_clip_planes(ctx); mask = ctx->Transform.ClipPlanesEnabled; while (mask) { const int j = u_bit_scan(&mask); buf[offset + i * 4 + 0].f = clip_planes[j][0]; buf[offset + i * 4 + 1].f = clip_planes[j][1]; buf[offset + i * 4 + 2].f = clip_planes[j][2]; buf[offset + i * 4 + 3].f = clip_planes[j][3]; i++; } } /* vertex shader constants */ if (brw->curbe.vs_size) { _mesa_load_state_parameters(ctx, brw->vertex_program->Parameters); GLuint offset = brw->curbe.vs_start * 16; /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->vs.base.prog_data->nr_params; i++) { buf[offset + i] = *brw->vs.base.prog_data->param[i]; } } if (0) { for (i = 0; i < sz*16; i+=4) fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f); } /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ emit: /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8 * (CONSTANT_BUFFER (CURBE Load)): * * "Modifying the CS URB allocation via URB_FENCE invalidates any * previous CURBE entries. Therefore software must subsequently * [re]issue a CONSTANT_BUFFER command before CURBE data can be used * in the pipeline." */ BEGIN_BATCH(2); if (brw->curbe.total_size == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); /* Work around a Broadwater/Crestline depth interpolator bug. The * following sequence will cause GPU hangs: * * 1. Change state so that all depth related fields in CC_STATE are * disabled, and in WM_STATE, only "PS Use Source Depth" is enabled. * 2. Emit a CONSTANT_BUFFER packet. * 3. Draw via 3DPRIMITIVE. * * The recommended workaround is to emit a non-pipelined state change after * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline. * * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small), * and always emit it when "PS Use Source Depth" is set. We could be more * precise, but the additional complexity is probably not worth it. * * BRW_NEW_FRAGMENT_PROGRAM */ if (brw->gen == 4 && !brw->is_g4x && (brw->fragment_program->info.inputs_read & (1 << VARYING_SLOT_POS))) { BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } }
GLboolean evergreenSetupVPconstants(struct gl_context * ctx) { context_t *context = EVERGREEN_CONTEXT(ctx); EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context); struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *) context->selected_vp; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; int alloc_size; /* sent out shader constants. */ paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { /* vp->mesa_program was cloned, not updated by glsl shader api. */ /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */ /* so, use ctx->VertexProgem._Current */ struct gl_program_parameter_list *paramListOrginal = ctx->VertexProgram._Current->Base.Parameters; _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS) return GL_FALSE; EVERGREEN_STATECHANGE(context, vs); evergreen->vs.num_consts = paramList->NumParameters; unNumParamData = paramList->NumParameters; /* alloc multiple of 16 constants */ alloc_size = ((unNumParamData * 4 * 4) + 255) & ~255; for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } radeonAllocDmaRegion(&context->radeon, &context->vp_Constbo, &context->vp_bo_offset, alloc_size, 256); r600EmitShaderConsts(ctx, context->vp_Constbo, context->vp_bo_offset, (GLvoid *)&(evergreen->vs.consts[0][0]), unNumParamData * 4 * 4); } else evergreen->vs.num_consts = 0; COMPILED_SUB * pCompiledSub; GLuint uj; GLuint unConstOffset = evergreen->vs.num_consts; for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) { pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; evergreen->vs.num_consts += pCompiledSub->NumParameters; for(uj=0; uj<pCompiledSub->NumParameters; uj++) { evergreen->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; evergreen->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; evergreen->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; evergreen->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; } unConstOffset += pCompiledSub->NumParameters; } return GL_TRUE; }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. * \param shader_type either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, gl_shader_stage stage) { enum pipe_shader_type shader_type = st_shader_stage_to_ptarget(stage); assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY || shader_type == PIPE_SHADER_TESS_CTRL || shader_type == PIPE_SHADER_TESS_EVAL || shader_type == PIPE_SHADER_COMPUTE); /* update the ATI constants before rendering */ if (shader_type == PIPE_SHADER_FRAGMENT && st->fp->ati_fs) { struct ati_fragment_shader *ati_fs = st->fp->ati_fs; unsigned c; for (c = 0; c < MAX_NUM_FRAGMENT_CONSTANTS_ATI; c++) { if (ati_fs->LocalConstDef & (1 << c)) memcpy(params->ParameterValues[c], ati_fs->Constants[c], sizeof(GLfloat) * 4); else memcpy(params->ParameterValues[c], st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4); } } /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; /* Update the constants which come from fixed-function state, such as * transformation matrices, fog factors, etc. The rest of the values in * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ if (params->StateFlags) _mesa_load_state_parameters(st->ctx, params); _mesa_shader_write_subroutine_indices(st->ctx, stage); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. * Let's use a user buffer to avoid an unnecessary copy. */ if (st->constbuf_uploader) { cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, paramBytes, st->ctx->Const.UniformBufferOffsetAlignment, params->ParameterValues, &cb.buffer_offset, &cb.buffer); u_upload_unmap(st->constbuf_uploader); } else { cb.buffer = NULL; cb.user_buffer = params->ParameterValues; cb.buffer_offset = 0; } cb.buffer_size = paramBytes; if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __func__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } cso_set_constant_buffer(st->cso_context, shader_type, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); st->state.constants[shader_type].ptr = params->ParameterValues; st->state.constants[shader_type].size = paramBytes; } else if (st->state.constants[shader_type].ptr) { /* Unbind. */ st->state.constants[shader_type].ptr = NULL; st->state.constants[shader_type].size = 0; cso_set_constant_buffer(st->cso_context, shader_type, 0, NULL); } }
static void upload_vs_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); unsigned int nr_params = vp->program.Base.Parameters->NumParameters; drm_intel_bo *constant_bo; int i; if (vp->use_const_buffer || nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", nr_params * 4 * sizeof(float), 4096); drm_intel_gem_bo_map_gtt(constant_bo); for (i = 0; i < nr_params; i++) { memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } drm_intel_gem_bo_unmap_gtt(constant_bo); BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ ALIGN(nr_params, 2) / 2 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); drm_intel_bo_unreference(constant_bo); } intel_batchbuffer_emit_mi_flush(intel->batch); BEGIN_BATCH(6); OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | GEN6_VS_STATISTICS_ENABLE); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); }
/** * This function executes vertex programs */ static GLboolean run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vp_stage_data *store = VP_STAGE_DATA(stage); struct vertex_buffer *VB = &tnl->vb; struct gl_vertex_program *program = ctx->VertexProgram._Current; struct gl_program_machine machine; GLuint outputs[VERT_RESULT_MAX], numOutputs; GLuint i, j; if (!program) return GL_TRUE; if (program->IsNVProgram) { _mesa_load_tracked_matrices(ctx); } else { /* ARB program or vertex shader */ _mesa_load_state_parameters(ctx, program->Base.Parameters); } /* make list of outputs to save some time below */ numOutputs = 0; for (i = 0; i < VERT_RESULT_MAX; i++) { if (program->Base.OutputsWritten & (1 << i)) { outputs[numOutputs++] = i; } } map_textures(ctx, program); for (i = 0; i < VB->Count; i++) { GLuint attr; init_machine(ctx, &machine); #if 0 printf("Input %d: %f, %f, %f, %f\n", i, VB->AttribPtr[0]->data[i][0], VB->AttribPtr[0]->data[i][1], VB->AttribPtr[0]->data[i][2], VB->AttribPtr[0]->data[i][3]); printf(" color: %f, %f, %f, %f\n", VB->AttribPtr[3]->data[i][0], VB->AttribPtr[3]->data[i][1], VB->AttribPtr[3]->data[i][2], VB->AttribPtr[3]->data[i][3]); printf(" normal: %f, %f, %f, %f\n", VB->AttribPtr[2]->data[i][0], VB->AttribPtr[2]->data[i][1], VB->AttribPtr[2]->data[i][2], VB->AttribPtr[2]->data[i][3]); #endif /* the vertex array case */ for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { if (program->Base.InputsRead & (1 << attr)) { const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data; const GLuint size = VB->AttribPtr[attr]->size; const GLuint stride = VB->AttribPtr[attr]->stride; const GLfloat *data = (GLfloat *) (ptr + stride * i); COPY_CLEAN_4V(machine.VertAttribs[attr], size, data); } } /* execute the program */ _mesa_execute_program(ctx, &program->Base, &machine); /* copy the output registers into the VB->attribs arrays */ for (j = 0; j < numOutputs; j++) { const GLuint attr = outputs[j]; COPY_4V(store->results[attr].data[i], machine.Outputs[attr]); } #if 0 printf("HPOS: %f %f %f %f\n", machine.Outputs[0][0], machine.Outputs[0][1], machine.Outputs[0][2], machine.Outputs[0][3]); #endif } unmap_textures(ctx, program); /* Fixup fog and point size results if needed */ if (program->IsNVProgram) { if (ctx->Fog.Enabled && (program->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) == 0) { for (i = 0; i < VB->Count; i++) { store->results[VERT_RESULT_FOGC].data[i][0] = 1.0; } } if (ctx->VertexProgram.PointSizeEnabled && (program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) == 0) { for (i = 0; i < VB->Count; i++) { store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size; } } } if (program->IsPositionInvariant) { /* We need the exact same transform as in the fixed function path here * to guarantee invariance, depending on compiler optimization flags * results could be different otherwise. */ VB->ClipPtr = TransformRaw( &store->results[0], &ctx->_ModelProjectMatrix, VB->AttribPtr[0] ); /* Drivers expect this to be clean to element 4... */ switch (VB->ClipPtr->size) { case 1: /* impossible */ case 2: _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 2 ); /* fall-through */ case 3: _mesa_vector4f_clean_elem( VB->ClipPtr, VB->Count, 3 ); /* fall-through */ case 4: break; } } else { /* Setup the VB pointers so that the next pipeline stages get * their data from the right place (the program output arrays). */ VB->ClipPtr = &store->results[VERT_RESULT_HPOS]; VB->ClipPtr->size = 4; VB->ClipPtr->count = VB->Count; } VB->ColorPtr[0] = &store->results[VERT_RESULT_COL0]; VB->ColorPtr[1] = &store->results[VERT_RESULT_BFC0]; VB->SecondaryColorPtr[0] = &store->results[VERT_RESULT_COL1]; VB->SecondaryColorPtr[1] = &store->results[VERT_RESULT_BFC1]; VB->FogCoordPtr = &store->results[VERT_RESULT_FOGC]; VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0]; VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1]; VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC]; VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ]; for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { VB->TexCoordPtr[i] = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i] = &store->results[VERT_RESULT_TEX0 + i]; } for (i = 0; i < ctx->Const.MaxVarying; i++) { if (program->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) { /* Note: varying results get put into the generic attributes */ VB->AttribPtr[VERT_ATTRIB_GENERIC0+i] = &store->results[VERT_RESULT_VAR0 + i]; } } /* Perform NDC and cliptest operations: */ return do_ndc_cliptest(ctx, store); }
/** * This function executes vertex programs */ static GLboolean run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vp_stage_data *store = VP_STAGE_DATA(stage); struct vertex_buffer *VB = &tnl->vb; struct gl_vertex_program *program = ctx->VertexProgram._Current; struct gl_program_machine machine; GLuint outputs[VERT_RESULT_MAX], numOutputs; GLuint i, j; if (!program) return GL_TRUE; if (program->IsNVProgram) { _mesa_load_tracked_matrices(ctx); } else { /* ARB program or vertex shader */ _mesa_load_state_parameters(ctx, program->Base.Parameters); } numOutputs = 0; for (i = 0; i < VERT_RESULT_MAX; i++) { if (program->Base.OutputsWritten & (1 << i)) { outputs[numOutputs++] = i; } } for (i = 0; i < VB->Count; i++) { GLuint attr; init_machine(ctx, &machine); #if 0 printf("Input %d: %f, %f, %f, %f\n", i, VB->AttribPtr[0]->data[i][0], VB->AttribPtr[0]->data[i][1], VB->AttribPtr[0]->data[i][2], VB->AttribPtr[0]->data[i][3]); printf(" color: %f, %f, %f, %f\n", VB->AttribPtr[3]->data[i][0], VB->AttribPtr[3]->data[i][1], VB->AttribPtr[3]->data[i][2], VB->AttribPtr[3]->data[i][3]); printf(" normal: %f, %f, %f, %f\n", VB->AttribPtr[2]->data[i][0], VB->AttribPtr[2]->data[i][1], VB->AttribPtr[2]->data[i][2], VB->AttribPtr[2]->data[i][3]); #endif /* the vertex array case */ for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { if (program->Base.InputsRead & (1 << attr)) { const GLubyte *ptr = (const GLubyte*) VB->AttribPtr[attr]->data; const GLuint size = VB->AttribPtr[attr]->size; const GLuint stride = VB->AttribPtr[attr]->stride; const GLfloat *data = (GLfloat *) (ptr + stride * i); COPY_CLEAN_4V(machine.VertAttribs[attr], size, data); } } /* execute the program */ _mesa_execute_program(ctx, &program->Base, &machine); /* copy the output registers into the VB->attribs arrays */ for (j = 0; j < numOutputs; j++) { const GLuint attr = outputs[j]; COPY_4V(store->results[attr].data[i], machine.Outputs[attr]); } #if 0 printf("HPOS: %f %f %f %f\n", machine.Outputs[0][0], machine.Outputs[0][1], machine.Outputs[0][2], machine.Outputs[0][3]); #endif } /* Fixup fog and point size results if needed */ if (program->IsNVProgram) { if (ctx->Fog.Enabled && (program->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) == 0) { for (i = 0; i < VB->Count; i++) { store->results[VERT_RESULT_FOGC].data[i][0] = 1.0; } } if (ctx->VertexProgram.PointSizeEnabled && (program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) == 0) { for (i = 0; i < VB->Count; i++) { store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size; } } } /* Setup the VB pointers so that the next pipeline stages get * their data from the right place (the program output arrays). */ VB->ClipPtr = &store->results[VERT_RESULT_HPOS]; VB->ClipPtr->size = 4; VB->ClipPtr->count = VB->Count; VB->ColorPtr[0] = &store->results[VERT_RESULT_COL0]; VB->ColorPtr[1] = &store->results[VERT_RESULT_BFC0]; VB->SecondaryColorPtr[0] = &store->results[VERT_RESULT_COL1]; VB->SecondaryColorPtr[1] = &store->results[VERT_RESULT_BFC1]; VB->FogCoordPtr = &store->results[VERT_RESULT_FOGC]; VB->AttribPtr[VERT_ATTRIB_COLOR0] = &store->results[VERT_RESULT_COL0]; VB->AttribPtr[VERT_ATTRIB_COLOR1] = &store->results[VERT_RESULT_COL1]; VB->AttribPtr[VERT_ATTRIB_FOG] = &store->results[VERT_RESULT_FOGC]; VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->results[VERT_RESULT_PSIZ]; for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { VB->TexCoordPtr[i] = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i] = &store->results[VERT_RESULT_TEX0 + i]; } for (i = 0; i < ctx->Const.MaxVarying; i++) { if (program->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) { /* Note: varying results get put into the generic attributes */ VB->AttribPtr[VERT_ATTRIB_GENERIC0+i] = &store->results[VERT_RESULT_VAR0 + i]; } } /* Cliptest and perspective divide. Clip functions must clear * the clipmask. */ store->ormask = 0; store->andmask = CLIP_FRUSTUM_BITS; if (tnl->NeedNdcCoords) { VB->NdcPtr = _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr, &store->ndcCoords, store->clipmask, &store->ormask, &store->andmask ); } else { VB->NdcPtr = NULL; _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr, NULL, store->clipmask, &store->ormask, &store->andmask ); } if (store->andmask) /* All vertices are outside the frustum */ return GL_FALSE; /* This is where we'd do clip testing against the user-defined * clipping planes, but they're not supported by vertex programs. */ VB->ClipOrMask = store->ormask; VB->ClipMask = store->clipmask; return GL_TRUE; }
GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct r700_vertex_program *vp = context->selected_vp; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; if(GL_FALSE == vp->loaded) { if(vp->r700Shader.bNeedsAssembly == GL_TRUE) { Assemble( &(vp->r700Shader) ); } /* Load vp to gpu */ r600EmitShader(ctx, &(vp->shaderbo), (GLvoid *)(vp->r700Shader.pProgram), vp->r700Shader.uShaderBinaryDWORDSize, "VS"); vp->loaded = GL_TRUE; } DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram), vp->r700Shader.uShaderBinaryDWORDSize); /* TODO : enable this after MemUse fixed *= (context->chipobj.MemUse)(context, vp->shadercode.buf->id); */ R600_STATECHANGE(context, vs); R600_STATECHANGE(context, fs); /* hack */ r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0; SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */ SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ { SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize, STACK_SIZE_shift, STACK_SIZE_mask); } R600_STATECHANGE(context, spi); SETfield(r700->SPI_VS_OUT_CONFIG.u32All, vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0, VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask); SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports, NUM_INTERP_shift, NUM_INTERP_mask); /* SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); */ /* sent out shader constants. */ paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) return GL_FALSE; R600_STATECHANGE(context, vs_consts); r700->vs.num_consts = paramList->NumParameters; unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } else r700->vs.num_consts = 0; return GL_TRUE; }
GLboolean r700SetupVertexProgram(struct gl_context * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct r700_vertex_program *vp = context->selected_vp; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; if(GL_FALSE == vp->loaded) { if(vp->r700Shader.bNeedsAssembly == GL_TRUE) { Assemble( &(vp->r700Shader) ); } /* Load vp to gpu */ r600EmitShader(ctx, &(vp->shaderbo), (GLvoid *)(vp->r700Shader.pProgram), vp->r700Shader.uShaderBinaryDWORDSize, "VS"); if(GL_TRUE == r700->bShaderUseMemConstant) { paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { unNumParamData = paramList->NumParameters; r600AllocShaderConsts(ctx, &(vp->constbo0), unNumParamData *4*4, "VSCON"); } } vp->loaded = GL_TRUE; } DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram), vp->r700Shader.uShaderBinaryDWORDSize); /* TODO : enable this after MemUse fixed *= (context->chipobj.MemUse)(context, vp->shadercode.buf->id); */ R600_STATECHANGE(context, vs); R600_STATECHANGE(context, fs); /* hack */ r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0; SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */ r700->vs.SQ_PGM_START_VS.u32All = 0; SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ { SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize, STACK_SIZE_shift, STACK_SIZE_mask); } R600_STATECHANGE(context, spi); if(vp->mesa_program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { R600_STATECHANGE(context, cl); SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit); SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit); } else if (r700->PA_CL_VS_OUT_CNTL.u32All != 0) { R600_STATECHANGE(context, cl); CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit); CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit); } SETfield(r700->SPI_VS_OUT_CONFIG.u32All, vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0, VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask); SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports, NUM_INTERP_shift, NUM_INTERP_mask); /* SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit); CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); */ /* sent out shader constants. */ paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { /* vp->mesa_program was cloned, not updated by glsl shader api. */ /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */ /* so, use ctx->VertexProgem._Current */ struct gl_program_parameter_list *paramListOrginal = ctx->VertexProgram._Current->Base.Parameters; _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) return GL_FALSE; R600_STATECHANGE(context, vs_consts); r700->vs.num_consts = paramList->NumParameters; unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; } else { r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } /* Load vp constants to gpu */ if(GL_TRUE == r700->bShaderUseMemConstant) { r600EmitShaderConsts(ctx, vp->constbo0, 0, (GLvoid *)&(r700->vs.consts[0][0]), unNumParamData * 4 * 4); } } else r700->vs.num_consts = 0; COMPILED_SUB * pCompiledSub; GLuint uj; GLuint unConstOffset = r700->vs.num_consts; for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) { pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; r700->vs.num_consts += pCompiledSub->NumParameters; for(uj=0; uj<pCompiledSub->NumParameters; uj++) { r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; } unConstOffset += pCompiledSub->NumParameters; } return GL_TRUE; }
/** * Creates a streamed BO containing the push constants for the VS or GS on * gen6+. * * Push constants are constant values (such as GLSL uniforms) that are * pre-loaded into a shader stage's register space at thread spawn time. * * Not all GLSL uniforms will be uploaded as push constants: The hardware has * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be * uploaded as push constants, while GL 4.4 requires at least 1024 components * to be usable for the VS. Plus, currently we always use pull constants * instead of push constants when doing variable-index array access. * * See brw_curbe.c for the equivalent gen4/5 code. */ void gen6_upload_push_constants(struct brw_context *brw, const struct gl_program *prog, const struct brw_stage_prog_data *prog_data, struct brw_stage_state *stage_state, enum aub_state_struct_type type) { struct gl_context *ctx = &brw->ctx; /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, prog->Parameters); if (prog_data->nr_params == 0) { stage_state->push_const_size = 0; } else { gl_constant_value *param; int i; param = brw_state_batch(brw, type, prog_data->nr_params * sizeof(gl_constant_value), 32, &stage_state->push_const_offset); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* _NEW_PROGRAM_CONSTANTS * * Also _NEW_TRANSFORM -- we may reference clip planes other than as a * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS * wouldn't be set for them. */ for (i = 0; i < prog_data->nr_params; i++) { param[i] = *prog_data->param[i]; } if (0) { fprintf(stderr, "%s constants:\n", _mesa_shader_stage_to_string(stage_state->stage)); for (i = 0; i < prog_data->nr_params; i++) { if ((i & 7) == 0) fprintf(stderr, "g%d: ", prog_data->dispatch_grf_start_reg + i / 8); fprintf(stderr, "%8f ", param[i].f); if ((i & 7) == 7) fprintf(stderr, "\n"); } if ((i & 7) != 0) fprintf(stderr, "\n"); fprintf(stderr, "\n"); } stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; /* We can only push 32 registers of constants at a time. */ /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: * * "The sum of all four read length fields (each incremented to * represent the actual read length) must be less than or equal to * 32" * * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: * * "The sum of all four read length fields must be less than or * equal to the size of 64" * * The other shader stages all match the VS's limits. */ assert(stage_state->push_const_size <= 32); } }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. */ void st_upload_constants(struct st_context *st, struct gl_program *prog) { gl_shader_stage stage = prog->info.stage; struct gl_program_parameter_list *params = prog->Parameters; enum pipe_shader_type shader_type = pipe_shader_type_from_mesa(stage); assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY || shader_type == PIPE_SHADER_TESS_CTRL || shader_type == PIPE_SHADER_TESS_EVAL || shader_type == PIPE_SHADER_COMPUTE); /* update the ATI constants before rendering */ if (shader_type == PIPE_SHADER_FRAGMENT && st->fp->ati_fs) { struct ati_fragment_shader *ati_fs = st->fp->ati_fs; unsigned c; for (c = 0; c < MAX_NUM_FRAGMENT_CONSTANTS_ATI; c++) { if (ati_fs->LocalConstDef & (1 << c)) memcpy(params->ParameterValues[c], ati_fs->Constants[c], sizeof(GLfloat) * 4); else memcpy(params->ParameterValues[c], st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4); } } /* Make all bindless samplers/images bound texture/image units resident in * the context. */ st_make_bound_samplers_resident(st, prog); st_make_bound_images_resident(st, prog); /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; /* Update the constants which come from fixed-function state, such as * transformation matrices, fog factors, etc. The rest of the values in * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ if (params->StateFlags) _mesa_load_state_parameters(st->ctx, params); _mesa_shader_write_subroutine_indices(st->ctx, stage); cb.buffer = NULL; cb.user_buffer = params->ParameterValues; cb.buffer_offset = 0; cb.buffer_size = paramBytes; if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __func__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } cso_set_constant_buffer(st->cso_context, shader_type, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); st->state.constants[shader_type].ptr = params->ParameterValues; st->state.constants[shader_type].size = paramBytes; } else if (st->state.constants[shader_type].ptr) { /* Unbind. */ st->state.constants[shader_type].ptr = NULL; st->state.constants[shader_type].size = 0; cso_set_constant_buffer(st->cso_context, shader_type, 0, NULL); } }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } return; } buf = (GLfloat *) calloc(1, bufsz); /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0]; buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; i++; } } } /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); if (vp->use_const_buffer) { /* Load the subset of push constants that will get used when * we also have a pull constant buffer. */ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { if (brw->vs.constant_map[i] != -1) { assert(brw->vs.constant_map[i] <= nr); memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } } } else { for (i = 0; i < nr; i++) { memcpy(buf + offset + i * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } } } if (0) { for (i = 0; i < sz*16; i+=4) printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ free(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); dri_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } if (brw->curbe.curbe_bo == NULL) { /* Allocate a single page for CURBE entries for this batchbuffer. * They're generally around 64b. */ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; brw->curbe.curbe_next_offset += bufsz; brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, buf, bufsz); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ }