/** * Execute the given fragment shader. * NOTE: we do everything in single-precision floating point * \param ctx - rendering context * \param shader - the shader to execute * \param machine - virtual machine state * \param span - the SWspan we're operating on * \param column - which pixel [i] we're operating on in the span */ static void execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader, struct atifs_machine *machine, const SWspan *span, GLuint column) { GLuint pc; struct atifs_instruction *inst; struct atifs_setupinst *texinst; GLint optype; GLuint i; GLint j, pass; GLint dstreg; GLfloat src[2][3][4]; GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; GLfloat dst[2][4], *dstp; for (pass = 0; pass < shader->NumPasses; pass++) { if (pass > 0) finish_pass(machine); for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) { texinst = &shader->SetupInst[pass][j]; if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) handle_pass_op(machine, texinst, span, column, j); else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) handle_sample_op(ctx, machine, texinst, span, column, j); } for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { inst = &shader->Instructions[pass][pc]; /* setup the source registers for color and alpha ops */ for (optype = 0; optype < 2; optype++) { for (i = 0; i < inst->ArgCount[optype]; i++) { GLint index = inst->SrcReg[optype][i].Index; if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) SETUP_SRC_REG(optype, i, machine->Registers[index - GL_REG_0_ATI]); else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) { SETUP_SRC_REG(optype, i, shader->Constants[index - GL_CON_0_ATI]); } else { SETUP_SRC_REG(optype, i, ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]); } } else if (index == GL_ONE) SETUP_SRC_REG(optype, i, ones); else if (index == GL_ZERO) SETUP_SRC_REG(optype, i, zeros); else if (index == GL_PRIMARY_COLOR_EXT) SETUP_SRC_REG(optype, i, machine->Inputs[ATI_FS_INPUT_PRIMARY]); else if (index == GL_SECONDARY_INTERPOLATOR_ATI) SETUP_SRC_REG(optype, i, machine->Inputs[ATI_FS_INPUT_SECONDARY]); apply_src_rep(optype, inst->SrcReg[optype][i].argRep, src[optype][i]); apply_src_mod(optype, inst->SrcReg[optype][i].argMod, src[optype][i]); } } /* Execute the operations - color then alpha */ for (optype = 0; optype < 2; optype++) { if (inst->Opcode[optype]) { switch (inst->Opcode[optype]) { case GL_ADD_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] + src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] + src[optype][1][3]; break; case GL_SUB_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] - src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] - src[optype][1][3]; break; case GL_MUL_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3]; break; case GL_MAD_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i] + src[optype][2][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3] + src[optype][2][3]; break; case GL_LERP_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i] + (1 - src [optype] [0][i]) * src[optype][2][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3] + (1 - src[optype] [0][3]) * src[optype][2][3]; break; case GL_MOV_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i]; } else dst[optype][3] = src[optype][0][3]; break; case GL_CND_ATI: if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] > 0.5) ? src[optype][0][i] : src[optype][1][i]; } } else { dst[optype][3] = (src[optype][2][3] > 0.5) ? src[optype][0][3] : src[optype][1][3]; } break; case GL_CND0_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] >= 0) ? src[optype][0][i] : src[optype][1][i]; } else { dst[optype][3] = (src[optype][2][3] >= 0) ? src[optype][0][3] : src[optype][1][3]; } break; case GL_DOT2_ADD_ATI: { GLfloat result; /* DOT 2 always uses the source from the color op */ /* could save recalculation of dot products for alpha inst */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][2][2]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; case GL_DOT3_ATI: { GLfloat result; /* DOT 3 always uses the source from the color op */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][0][2] * src[0][1][2]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; case GL_DOT4_ATI: { GLfloat result; /* DOT 4 always uses the source from the color op */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][0][2] * src[0][1][2] + src[0][0][3] * src[0][1][3]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; } } } /* write out the destination registers */ for (optype = 0; optype < 2; optype++) { if (inst->Opcode[optype]) { dstreg = inst->DstReg[optype].Index; dstp = machine->Registers[dstreg - GL_REG_0_ATI]; if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) && (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI))) write_dst_addr(optype, inst->DstReg[optype].dstMod, inst->DstReg[optype].dstMask, dst[optype], dstp); else write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp); } } } } }
static GLboolean execute_shader(GLcontext * ctx, const struct ati_fragment_shader *shader, GLuint maxInst, struct atifs_machine *machine, const struct sw_span *span, GLuint column) { GLuint pc; struct atifs_instruction *inst; GLint optype; GLint i; GLint dstreg; GLfloat src[2][3][4]; GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; GLfloat dst[2][4], *dstp; for (pc = 0; pc < shader->Base.NumInstructions; pc++) { inst = &shader->Instructions[pc]; if (inst->Opcode[0] == ATI_FRAGMENT_SHADER_PASS_OP) handle_pass_op(machine, inst, span, column); else if (inst->Opcode[0] == ATI_FRAGMENT_SHADER_SAMPLE_OP) handle_sample_op(ctx, machine, inst, span, column); else { if (machine->pass == 0) machine->pass = 1; /* setup the source registers for color and alpha ops */ for (optype = 0; optype < 2; optype++) { for (i = 0; i < inst->ArgCount[optype]; i++) { GLint index = inst->SrcReg[optype][i].Index; if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) SETUP_SRC_REG(optype, i, machine->Registers[index - GL_REG_0_ATI]); else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) SETUP_SRC_REG(optype, i, shader->Constants[index - GL_CON_0_ATI]); else if (index == GL_ONE) SETUP_SRC_REG(optype, i, ones); else if (index == GL_ZERO) SETUP_SRC_REG(optype, i, zeros); else if (index == GL_PRIMARY_COLOR_EXT) SETUP_SRC_REG(optype, i, machine->Inputs[ATI_FS_INPUT_PRIMARY]); else if (index == GL_SECONDARY_INTERPOLATOR_ATI) SETUP_SRC_REG(optype, i, machine->Inputs[ATI_FS_INPUT_SECONDARY]); apply_src_rep(optype, inst->SrcReg[optype][i].argRep, src[optype][i]); apply_src_mod(optype, inst->SrcReg[optype][i].argMod, src[optype][i]); } } /* Execute the operations - color then alpha */ for (optype = 0; optype < 2; optype++) { if (inst->Opcode[optype]) { switch (inst->Opcode[optype]) { case GL_ADD_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] + src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] + src[optype][1][3]; break; case GL_SUB_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] - src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] - src[optype][1][3]; break; case GL_MUL_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3]; break; case GL_MAD_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i] + src[optype][2][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3] + src[optype][2][3]; break; case GL_LERP_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i] + (1 - src [optype] [0][i]) * src[optype][2][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3] + (1 - src[optype] [0][3]) * src[optype][2][3]; break; case GL_MOV_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i]; } else dst[optype][3] = src[optype][0][3]; break; case GL_CND_ATI: if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] > 0.5) ? src[optype][0][i] : src[optype][1][i]; } } else { dst[optype][3] = (src[optype][2][3] > 0.5) ? src[optype][0][3] : src[optype][1][3]; } break; case GL_CND0_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] >= 0) ? src[optype][0][i] : src[optype][1][i]; } else { dst[optype][3] = (src[optype][2][3] >= 0) ? src[optype][0][3] : src[optype][1][3]; } break; case GL_DOT2_ADD_ATI: { GLfloat result; /* DOT 2 always uses the source from the color op */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][2][2]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; case GL_DOT3_ATI: { GLfloat result; /* DOT 3 always uses the source from the color op */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][0][2] * src[0][1][2]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; case GL_DOT4_ATI: { GLfloat result; /* DOT 4 always uses the source from the color op */ result = src[optype][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][0][2] * src[0][1][2] + src[0][0][3] * src[0][1][3]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; } } } /* write out the destination registers */ for (optype = 0; optype < 2; optype++) { if (inst->Opcode[optype]) { dstreg = inst->DstReg[optype].Index; dstp = machine->Registers[dstreg - GL_REG_0_ATI]; write_dst_addr(optype, inst->DstReg[optype].dstMod, inst->DstReg[optype].dstMask, dst[optype], dstp); } } } } return GL_TRUE; }