uint i915_emit_const1f(struct i915_fp_compile * p, float c0) { struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 4; idx++) { if (!(ifs->constant_flags[reg] & (1 << idx)) || ifs->constants[reg][idx] == c0) { ifs->constants[reg][idx] = c0; ifs->constant_flags[reg] |= 1 << idx; if (reg + 1 > ifs->num_constants) ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); } } } i915_program_error(p, "i915_emit_const1f: out of constants"); return 0; }
GLuint i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0) { GLint reg, idx; if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) continue; for (idx = 0; idx < 4; idx++) { if (!(p->constant_flags[reg] & (1 << idx)) || p->constant[reg][idx] == c0) { p->constant[reg][idx] = c0; p->constant_flags[reg] |= 1 << idx; if (reg + 1 > p->nr_constants) p->nr_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); } } } fprintf(stderr, "%s: out of constants\n", __FUNCTION__); p->error = 1; return 0; }
void updatePolyhedron(const Vec3& current) { const Grid& grid = m_tool->grid(); const Math::Axis::Type axis = m_plane.normal.firstComponent(); const Plane3 swizzledPlane(swizzle(m_plane.anchor(), axis), swizzle(m_plane.normal, axis)); const Vec3 theMin = swizzle(grid.snapDown(min(m_initialPoint, current)), axis); const Vec3 theMax = swizzle(grid.snapUp (max(m_initialPoint, current)), axis); const Vec2 topLeft2(theMin.x(), theMin.y()); const Vec2 topRight2(theMax.x(), theMin.y()); const Vec2 bottomLeft2(theMin.x(), theMax.y()); const Vec2 bottomRight2(theMax.x(), theMax.y()); const Vec3 topLeft3 = unswizzle(Vec3(topLeft2, swizzledPlane.zAt(topLeft2)), axis); const Vec3 topRight3 = unswizzle(Vec3(topRight2, swizzledPlane.zAt(topRight2)), axis); const Vec3 bottomLeft3 = unswizzle(Vec3(bottomLeft2, swizzledPlane.zAt(bottomLeft2)), axis); const Vec3 bottomRight3 = unswizzle(Vec3(bottomRight2, swizzledPlane.zAt(bottomRight2)), axis); Polyhedron3 polyhedron = m_oldPolyhedron; polyhedron.addPoint(topLeft3); polyhedron.addPoint(bottomLeft3); polyhedron.addPoint(bottomRight3); polyhedron.addPoint(topRight3); m_tool->update(polyhedron); }
/** * [1, src0.y*src1.y, src0.z, src1.w] * So basically MUL with lotsa swizzling. */ static void transform_DST(struct radeon_compiler* c, struct rc_instruction* inst) { emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); rc_remove_instruction(inst); }
static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); }
void vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst, unsigned base_offset, unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; /* Set up the message header to reference the proper parts of the URB */ dst_reg header = dst_reg(this, glsl_type::uvec4_type); inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header, brw_imm_ud(dst.writemask << first_component), indirect_offset); inst->force_writemask_all = true; vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header)); read->offset = base_offset; read->mlen = 1; read->base_mrf = -1; if (first_component) { /* Read into a temporary and copy with a swizzle and writemask. */ read->dst = retype(dst_reg(this, glsl_type::ivec4_type), dst.type); emit(MOV(dst, swizzle(src_reg(read->dst), BRW_SWZ_COMP_INPUT(first_component)))); } }
void vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst, const src_reg &vertex_index, unsigned base_offset, unsigned first_component, const src_reg &indirect_offset) { vec4_instruction *inst; dst_reg temp(this, glsl_type::ivec4_type); temp.type = dst.type; /* Set up the message header to reference the proper parts of the URB */ dst_reg header = dst_reg(this, glsl_type::uvec4_type); inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index, indirect_offset); inst->force_writemask_all = true; /* Read into a temporary, ignoring writemasking. */ inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); inst->offset = base_offset; inst->mlen = 1; inst->base_mrf = -1; /* Copy the temporary to the destination to deal with writemasking. * * Also attempt to deal with gl_PointSize being in the .w component. */ if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW))); } else { src_reg src = src_reg(temp); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); emit(MOV(dst, src)); } }
int test(lua_State *L) { float data[12] = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.}; float out_data[12]; VecN<float, 6> *v_in = (VecN<float, 6> *)data; VecN<float, 6> *v_out = (VecN<float, 6> *)out_data; for(int i=0; i < 2; i++) { int oo[] = {1, 0}; int ii[] = {0, 1}; MAT(v_out) = MAT(v_in)+MAT(v_in); swizzle(MAT(v_out), MAT(v_in), 2, oo, ii); v_in++; v_out++; } for(int j=0; j < 12; j++) { printf("j: %f\n", out_data[j]); } /* Vec3<char> v1c(64, 123, 5); v1c = v1c*0.5; printf("t: %d %d %d\n", v1c.x, v1c.y, v1c.z); */ return 0; }
void *accessorThread(void *arg){ int *result = (int*)malloc(sizeof(int));; *result = 0; while(*result < MAXVAL){ swizzle(result); usleep(10 + (rand() % 100) ); } pthread_exit(result); }
void *accessorThread(void *arg){ int *result = (int*)malloc(sizeof(int)); klee_make_symbolic(result, sizeof(int), "result"); *result = 0; while(*result < MAXVAL){ swizzle(result); usleep(10 + (rand() % 100) ); } pthread_exit(result); }
/* Rather than trying to intercept and jiggle depth writes during * emit, just move the value into its correct position at the end of * the program: */ static void fixup_depth_write(struct i915_fragment_program *p) { if (p->depth_written) { GLuint depth = UREG(REG_TYPE_OD, 0); i915_emit_arith(p, A0_MOV, depth, A0_DEST_CHANNEL_W, 0, swizzle(depth, X, Y, Z, Z), 0, 0); } }
static GLuint emit_combine_source( struct i915_fragment_program *p, GLuint mask, GLuint unit, GLenum source, GLenum operand ) { GLuint arg, src; src = get_source(p, source, unit); switch (operand) { case GL_ONE_MINUS_SRC_COLOR: /* Get unused tmp, * Emit tmp = 1.0 + arg.-x-y-z-w */ arg = i915_get_temp( p ); return i915_emit_arith( p, A0_ADD, arg, mask, 0, swizzle(src, ONE, ONE, ONE, ONE ), negate(src, 1,1,1,1), 0); case GL_SRC_ALPHA: if (mask == A0_DEST_CHANNEL_W) return src; else return swizzle( src, W, W, W, W ); case GL_ONE_MINUS_SRC_ALPHA: /* Get unused tmp, * Emit tmp = 1.0 + arg.-w-w-w-w */ arg = i915_get_temp( p ); return i915_emit_arith( p, A0_ADD, arg, mask, 0, swizzle(src, ONE, ONE, ONE, ONE ), negate( swizzle(src,W,W,W,W), 1,1,1,1), 0); case GL_SRC_COLOR: default: return src; } }
uint i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) { struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); if (c0 == 1.0) return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); if (c1 == 0.0) return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); // XXX emit swizzle here for 0, 1, -1 and any combination thereof // we can use swizzle + neg for that for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (ifs->constant_flags[reg] == 0xf || ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 3; idx++) { if (!(ifs->constant_flags[reg] & (3 << idx))) { ifs->constants[reg][idx + 0] = c0; ifs->constants[reg][idx + 1] = c1; ifs->constant_flags[reg] |= 3 << idx; if (reg + 1 > ifs->num_constants) ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); } } } i915_program_error(p, "i915_emit_const2f: out of constants"); return 0; }
GLuint i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1) { GLint reg, idx; if (c0 == 0.0) return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); if (c0 == 1.0) return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); if (c1 == 0.0) return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (p->constant_flags[reg] == 0xf || p->constant_flags[reg] == I915_CONSTFLAG_PARAM) continue; for (idx = 0; idx < 3; idx++) { if (!(p->constant_flags[reg] & (3 << idx))) { p->constant[reg][idx] = c0; p->constant[reg][idx + 1] = c1; p->constant_flags[reg] |= 3 << idx; if (reg + 1 > p->nr_constants) p->nr_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); } } } fprintf(stderr, "%s: out of constants\n", __func__); p->error = 1; return 0; }
/** * Rather than trying to intercept and jiggle depth writes during * emit, just move the value into its correct position at the end of * the program: */ static void i915_fixup_depth_write(struct i915_fp_compile *p) { /* XXX assuming pos/depth is always in output[0] */ if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { const uint depth = UREG(REG_TYPE_OD, 0); i915_emit_arith(p, A0_MOV, /* opcode */ depth, /* dest reg */ A0_DEST_CHANNEL_W, /* write mask */ 0, /* saturate? */ swizzle(depth, X, Y, Z, Z), /* src0 */ 0, 0 /* src1, src2 */); } }
static void emit_program_fini( struct i915_fragment_program *p ) { int cf = get_source( p, GL_PREVIOUS, 0 ); int out = UREG( REG_TYPE_OC, 0 ); if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { /* Emit specular add. */ GLuint s = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_ALL); i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, 0, cf, swizzle(s, X,Y,Z,ZERO), 0 ); } else if (cf != out) { /* Will wind up in here if no texture enabled or a couple of * other scenarios (GL_REPLACE for instance). */ i915_emit_arith( p, A0_MOV, out, A0_DEST_CHANNEL_ALL, 0, cf, 0, 0 ); } }
/** * Retrieve a ureg for the given source register. Will emit * constants, apply swizzling and negation as needed. */ static GLuint src_vector( const struct fp_src_register *source ) { GLuint src; assert(source->Index < 32); /* limitiation of UREG representation */ src = UREG( src_reg_file( source->File ), source->Index ); src = swizzle(src, _X + source->Swizzle[0], _X + source->Swizzle[1], _X + source->Swizzle[2], _X + source->Swizzle[3]); if (source->NegateBase) src = negate( src, 1,1,1,1 ); return src; }
/* Remove a block from a given list. Does no sanity checking. */ static void unlinkBlock ( Arena* a, UInt* b, Int listno ) { vg_assert(listno >= 0 && listno < VG_N_MALLOC_LISTS); if (get_prev_p(b) == b) { /* Only one element in the list; treat it specially. */ vg_assert(get_next_p(b) == b); a->freelist[listno] = NULL; } else { UInt* b_prev = get_prev_p(b); UInt* b_next = get_next_p(b); a->freelist[listno] = b_prev; set_next_p(b_prev, b_next); set_prev_p(b_next, b_prev); swizzle ( a, listno ); } set_prev_p(b, NULL); set_next_p(b, NULL); }
static void build_sphere_texgen( struct tnl_program *p, struct ureg dest, GLuint writemask ) { struct ureg normal = get_transformed_normal(p); struct ureg eye_hat = get_eye_position_normalized(p); struct ureg tmp = get_temp(p); struct ureg half = register_scalar_const(p, .5); struct ureg r = get_temp(p); struct ureg inv_m = get_temp(p); struct ureg id = get_identity_param(p); /* Could share the above calculations, but it would be * a fairly odd state for someone to set (both sphere and * reflection active for different texture coordinate * components. Of course - if two texture units enable * reflect and/or sphere, things start to tilt in favour * of seperating this out: */ /* n.u */ emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); /* 2n.u */ emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); /* (-2n.u)n + u */ emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); /* r + 0,0,1 */ emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); /* rx^2 + ry^2 + (rz+1)^2 */ emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); /* 2/m */ emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); /* 1/m */ emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); /* r/m + 1/2 */ emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); release_temp(p, tmp); release_temp(p, r); release_temp(p, inv_m); }
ir_swizzle * swizzle_w(operand a) { return swizzle(a, SWIZZLE_WWWW, 1); }
/** * Retrieve a ureg for the given source register. Will emit * constants, apply swizzling and negation as needed. */ static GLuint src_vector(struct i915_fragment_program *p, const struct prog_src_register *source, const struct gl_fragment_program *program) { GLuint src; switch (source->File) { /* Registers: */ case PROGRAM_TEMPORARY: if (source->Index >= I915_MAX_TEMPORARY) { i915_program_error(p, "Exceeded max temporary reg"); return 0; } src = UREG(REG_TYPE_R, source->Index); break; case PROGRAM_INPUT: switch (source->Index) { case FRAG_ATTRIB_WPOS: src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); break; case FRAG_ATTRIB_COL0: src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); break; case FRAG_ATTRIB_COL1: src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); src = swizzle(src, X, Y, Z, ONE); break; case FRAG_ATTRIB_FOGC: src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); src = swizzle(src, W, ZERO, ZERO, ONE); break; case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: case FRAG_ATTRIB_TEX2: case FRAG_ATTRIB_TEX3: case FRAG_ATTRIB_TEX4: case FRAG_ATTRIB_TEX5: case FRAG_ATTRIB_TEX6: case FRAG_ATTRIB_TEX7: src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), D0_CHANNEL_ALL); break; default: i915_program_error(p, "Bad source->Index"); return 0; } break; /* Various paramters and env values. All emitted to * hardware as program constants. */ case PROGRAM_LOCAL_PARAM: src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]); break; case PROGRAM_ENV_PARAM: src = i915_emit_param4fv(p, p->ctx->FragmentProgram.Parameters[source-> Index]); break; case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: src = i915_emit_param4fv(p, program->Base.Parameters->ParameterValues[source-> Index]); break; default: i915_program_error(p, "Bad source->File"); return 0; } src = swizzle(src, GET_SWZ(source->Swizzle, 0), GET_SWZ(source->Swizzle, 1), GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); if (source->NegateBase) src = negate(src, GET_BIT(source->NegateBase, 0), GET_BIT(source->NegateBase, 1), GET_BIT(source->NegateBase, 2), GET_BIT(source->NegateBase, 3)); return src; }
/* Possible concerns: * * SIN, COS -- could use another taylor step? * LIT -- results seem a little different to sw mesa * LOG -- different to mesa on negative numbers, but this is conformant. * * Parse failures -- Mesa doesn't currently give a good indication * internally whether a particular program string parsed or not. This * can lead to confusion -- hopefully we cope with it ok now. * */ static void upload_program(struct i915_fragment_program *p) { const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; const struct prog_instruction *inst = program->Base.Instructions; /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ /* Is this a parse-failed program? Ensure a valid program is * loaded, as the flagging of an error isn't sufficient to stop * this being uploaded to hardware. */ if (inst[0].Opcode == OPCODE_END) { GLuint tmp = i915_get_utemp(p); i915_emit_arith(p, A0_MOV, UREG(REG_TYPE_OC, 0), A0_DEST_CHANNEL_ALL, 0, swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0); return; } if (program->Base.NumInstructions > I915_MAX_INSN) { i915_program_error( p, "Exceeded max instructions" ); return; } /* Not always needed: */ calc_live_regs(p); while (1) { GLuint src0, src1, src2, flags; GLuint tmp = 0, consts0 = 0, consts1 = 0; switch (inst->Opcode) { case OPCODE_ABS: src0 = src_vector(p, &inst->SrcReg[0], program); i915_emit_arith(p, A0_MAX, get_result_vector(p, inst), get_result_flags(inst), 0, src0, negate(src0, 1, 1, 1, 1), 0); break; case OPCODE_ADD: EMIT_2ARG_ARITH(A0_ADD); break; case OPCODE_CMP: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); src2 = src_vector(p, &inst->SrcReg[2], program); i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ break; case OPCODE_COS: src0 = src_vector(p, &inst->SrcReg[0], program); tmp = i915_get_utemp(p); consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); /* Reduce range from repeating about [-pi,pi] to [-1,1] */ i915_emit_arith(p, A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, src0, swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */ i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); i915_emit_arith(p, A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, tmp, swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ /* Compute COS with the same calculation used for SIN, but a * different source range has been mapped to [-1,1] this time. */ /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), tmp, 0); /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ i915_emit_arith(p, A0_DP3, tmp, A0_DEST_CHANNEL_X, 0, tmp, swizzle(consts1, X, Y, ZERO, ZERO), 0); /* tmp.x now contains a first approximation (y). Now, weight it * against tmp.y**2 to get closer. */ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ i915_emit_arith(p, A0_MAD, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), swizzle(tmp, ZERO, Y, ZERO, ZERO), negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ i915_emit_arith(p, A0_MAD, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(consts1, W, W, W, W), swizzle(tmp, Y, Y, Y, Y), swizzle(tmp, X, X, X, X)); break; case OPCODE_DP3: EMIT_2ARG_ARITH(A0_DP3); break; case OPCODE_DP4: EMIT_2ARG_ARITH(A0_DP4); break; case OPCODE_DPH: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); i915_emit_arith(p, A0_DP4, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, X, Y, Z, ONE), src1, 0); break; case OPCODE_DST: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; * result[2] = a[2] * 1; * result[3] = 1 * b[3]; */ i915_emit_arith(p, A0_MUL, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE), swizzle(src1, ONE, Y, ONE, W), 0); break; case OPCODE_EX2: src0 = src_vector(p, &inst->SrcReg[0], program); i915_emit_arith(p, A0_EXP, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case OPCODE_FLR: EMIT_1ARG_ARITH(A0_FLR); break; case OPCODE_FRC: EMIT_1ARG_ARITH(A0_FRC); break; case OPCODE_KIL: src0 = src_vector(p, &inst->SrcReg[0], program); tmp = i915_get_utemp(p); i915_emit_texld(p, get_live_regs(p, inst), tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ 0, src0, T0_TEXKILL); break; case OPCODE_LG2: src0 = src_vector(p, &inst->SrcReg[0], program); i915_emit_arith(p, A0_LOG, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case OPCODE_LIT: src0 = src_vector(p, &inst->SrcReg[0], program); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) * XXX: Clamp tmp.w to -128..128 * tmp.y = log(tmp.y) * tmp.y = tmp.w * tmp.y * tmp.y = exp(tmp.y) * result = cmp (a.11-x1, a.1x01, a.1xy1 ) */ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0, swizzle(src0, ZERO, ZERO, Z, W), 0); i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, Y, Y, Y, Y), 0, 0); i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, Y, ZERO, ZERO), swizzle(tmp, ZERO, W, ZERO, ZERO), 0); i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, Y, Y, Y, Y), 0, 0); i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE)); break; case OPCODE_LRP: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); src2 = src_vector(p, &inst->SrcReg[2], program); flags = get_result_flags(inst); tmp = i915_get_utemp(p); /* b*a + c*(1-a) * * b*a + c - ca * * tmp = b*a + c, * result = (-c)*a + tmp */ i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); i915_emit_arith(p, A0_MAD, get_result_vector(p, inst), flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); break; case OPCODE_MAD: EMIT_3ARG_ARITH(A0_MAD); break; case OPCODE_MAX: EMIT_2ARG_ARITH(A0_MAX); break; case OPCODE_MIN: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); tmp = i915_get_utemp(p); flags = get_result_flags(inst); i915_emit_arith(p, A0_MAX, tmp, flags & A0_DEST_CHANNEL_ALL, 0, negate(src0, 1, 1, 1, 1), negate(src1, 1, 1, 1, 1), 0); i915_emit_arith(p, A0_MOV, get_result_vector(p, inst), flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); break; case OPCODE_MOV: EMIT_1ARG_ARITH(A0_MOV); break; case OPCODE_MUL: EMIT_2ARG_ARITH(A0_MUL); break; case OPCODE_POW: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); tmp = i915_get_utemp(p); flags = get_result_flags(inst); /* XXX: masking on intermediate values, here and elsewhere. */ i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0, swizzle(src0, X, X, X, X), 0, 0); i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); i915_emit_arith(p, A0_EXP, get_result_vector(p, inst), flags, 0, swizzle(tmp, X, X, X, X), 0, 0); break; case OPCODE_RCP: src0 = src_vector(p, &inst->SrcReg[0], program); i915_emit_arith(p, A0_RCP, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case OPCODE_RSQ: src0 = src_vector(p, &inst->SrcReg[0], program); i915_emit_arith(p, A0_RSQ, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case OPCODE_SCS: src0 = src_vector(p, &inst->SrcReg[0], program); tmp = i915_get_utemp(p); /* * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x * scs.x = DP4 t1, sin_constants * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 * scs.y = DP4 t1, cos_constants */ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XY, 0, swizzle(src0, X, X, ONE, ONE), swizzle(src0, X, ONE, ONE, ONE), 0); i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_ALL, 0, swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE), 0); if (inst->DstReg.WriteMask & WRITEMASK_Y) { GLuint tmp1; if (inst->DstReg.WriteMask & WRITEMASK_X) tmp1 = i915_get_utemp(p); else tmp1 = tmp; i915_emit_arith(p, A0_MUL, tmp1, A0_DEST_CHANNEL_ALL, 0, swizzle(tmp, X, Y, Y, W), swizzle(tmp, X, Z, ONE, ONE), 0); i915_emit_arith(p, A0_DP4, get_result_vector(p, inst), A0_DEST_CHANNEL_Y, 0, swizzle(tmp1, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); } if (inst->DstReg.WriteMask & WRITEMASK_X) { i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XYZ, 0, swizzle(tmp, X, X, Z, ONE), swizzle(tmp, Z, ONE, ONE, ONE), 0); i915_emit_arith(p, A0_DP4, get_result_vector(p, inst), A0_DEST_CHANNEL_X, 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); } break; case OPCODE_SGE: EMIT_2ARG_ARITH(A0_SGE); break; case OPCODE_SIN: src0 = src_vector(p, &inst->SrcReg[0], program); tmp = i915_get_utemp(p); consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); /* Reduce range from repeating about [-pi,pi] to [-1,1] */ i915_emit_arith(p, A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, src0, swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */ i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); i915_emit_arith(p, A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, tmp, swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ /* Compute sin using a quadratic and quartic. It gives continuity * that repeating the Taylor series lacks every 2*pi, and has * reduced error. * * The idea was described at: * http://www.devmaster.net/forums/showthread.php?t=5784 */ /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), tmp, 0); /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ i915_emit_arith(p, A0_DP3, tmp, A0_DEST_CHANNEL_X, 0, tmp, swizzle(consts1, X, Y, ZERO, ZERO), 0); /* tmp.x now contains a first approximation (y). Now, weight it * against tmp.y**2 to get closer. */ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ i915_emit_arith(p, A0_MAD, tmp, A0_DEST_CHANNEL_Y, 0, swizzle(tmp, ZERO, X, ZERO, ZERO), swizzle(tmp, ZERO, Y, ZERO, ZERO), negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ i915_emit_arith(p, A0_MAD, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(consts1, W, W, W, W), swizzle(tmp, Y, Y, Y, Y), swizzle(tmp, X, X, X, X)); break; case OPCODE_SLT: EMIT_2ARG_ARITH(A0_SLT); break; case OPCODE_SUB: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); i915_emit_arith(p, A0_ADD, get_result_vector(p, inst), get_result_flags(inst), 0, src0, negate(src1, 1, 1, 1, 1), 0); break; case OPCODE_SWZ: EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */ break; case OPCODE_TEX: EMIT_TEX(T0_TEXLD); break; case OPCODE_TXB: EMIT_TEX(T0_TEXLDB); break; case OPCODE_TXP: EMIT_TEX(T0_TEXLDP); break; case OPCODE_XPD: /* Cross product: * result.x = src0.y * src1.z - src0.z * src1.y; * result.y = src0.z * src1.x - src0.x * src1.z; * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); tmp = i915_get_utemp(p); i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_ALL, 0, swizzle(src0, Z, X, Y, ONE), swizzle(src1, Y, Z, X, ONE), 0); i915_emit_arith(p, A0_MAD, get_result_vector(p, inst), get_result_flags(inst), 0, swizzle(src0, Y, Z, X, ONE), swizzle(src1, Z, X, Y, ONE), negate(tmp, 1, 1, 1, 0)); break; case OPCODE_END: return; default: i915_program_error(p, "bad opcode"); return; } inst++; i915_release_utemps(p); } }
static bool try_constant_propagate(const struct gen_device_info *devinfo, vec4_instruction *inst, int arg, const copy_entry *entry) { /* For constant propagation, we only handle the same constant * across all 4 channels. Some day, we should handle the 8-bit * float vector format, which would let us constant propagate * vectors better. * We could be more aggressive here -- some channels might not get used * based on the destination writemask. */ src_reg value = get_copy_value(*entry, brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle, WRITEMASK_XYZW)); if (value.file != IMM) return false; if (value.type == BRW_REGISTER_TYPE_VF) { /* The result of bit-casting the component values of a vector float * cannot in general be represented as an immediate. */ if (inst->src[arg].type != BRW_REGISTER_TYPE_F) return false; } else { value.type = inst->src[arg].type; } if (inst->src[arg].abs) { if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) || !brw_abs_immediate(value.type, &value.as_brw_reg())) { return false; } } if (inst->src[arg].negate) { if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) || !brw_negate_immediate(value.type, &value.as_brw_reg())) { return false; } } value = swizzle(value, inst->src[arg].swizzle); switch (inst->opcode) { case BRW_OPCODE_MOV: case SHADER_OPCODE_BROADCAST: inst->src[arg] = value; return true; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: if (devinfo->gen < 8) break; /* fallthrough */ case BRW_OPCODE_DP2: case BRW_OPCODE_DP3: case BRW_OPCODE_DP4: case BRW_OPCODE_DPH: case BRW_OPCODE_BFI1: case BRW_OPCODE_ASR: case BRW_OPCODE_SHL: case BRW_OPCODE_SHR: case BRW_OPCODE_SUBB: if (arg == 1) { inst->src[arg] = value; return true; } break; case BRW_OPCODE_MACH: case BRW_OPCODE_MUL: case SHADER_OPCODE_MULH: case BRW_OPCODE_ADD: case BRW_OPCODE_OR: case BRW_OPCODE_AND: case BRW_OPCODE_XOR: case BRW_OPCODE_ADDC: if (arg == 1) { inst->src[arg] = value; return true; } else if (arg == 0 && inst->src[1].file != IMM) { /* Fit this constant in by commuting the operands. Exception: we * can't do this for 32-bit integer MUL/MACH because it's asymmetric. */ if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MACH) && (inst->src[1].type == BRW_REGISTER_TYPE_D || inst->src[1].type == BRW_REGISTER_TYPE_UD)) break; inst->src[0] = inst->src[1]; inst->src[1] = value; return true; } break; case GS_OPCODE_SET_WRITE_OFFSET: /* This is just a multiply by a constant with special strides. * The generator will handle immediates in both arguments (generating * a single MOV of the product). So feel free to propagate in src0. */ inst->src[arg] = value; return true; case BRW_OPCODE_CMP: if (arg == 1) { inst->src[arg] = value; return true; } else if (arg == 0 && inst->src[1].file != IMM) { enum brw_conditional_mod new_cmod; new_cmod = brw_swap_cmod(inst->conditional_mod); if (new_cmod != BRW_CONDITIONAL_NONE) { /* Fit this constant in by swapping the operands and * flipping the test. */ inst->src[0] = inst->src[1]; inst->src[1] = value; inst->conditional_mod = new_cmod; return true; } } break; case BRW_OPCODE_SEL: if (arg == 1) { inst->src[arg] = value; return true; } else if (arg == 0 && inst->src[1].file != IMM) { inst->src[0] = inst->src[1]; inst->src[1] = value; /* If this was predicated, flipping operands means * we also need to flip the predicate. */ if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { inst->predicate_inverse = !inst->predicate_inverse; } return true; } break; default: break; } return false; }
static struct ureg swizzle1( struct ureg reg, int x ) { return swizzle(reg, x, x, x, x); }
/* Need to add some addtional parameters to allow lighting in object * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye * space lighting. */ static void build_lighting( struct tnl_program *p ) { const GLboolean twoside = p->state->light_twoside; const GLboolean separate = p->state->separate_specular; GLuint nr_lights = 0, count = 0; struct ureg normal = get_transformed_normal(p); struct ureg lit = get_temp(p); struct ureg dots = get_temp(p); struct ureg _col0 = undef, _col1 = undef; struct ureg _bfc0 = undef, _bfc1 = undef; GLuint i; /* * NOTE: * dots.x = dot(normal, VPpli) * dots.y = dot(normal, halfAngle) * dots.z = back.shininess * dots.w = front.shininess */ for (i = 0; i < MAX_LIGHTS; i++) if (p->state->unit[i].light_enabled) nr_lights++; set_material_flags(p); { if (!p->state->material_shininess_is_zero) { struct ureg shininess = get_material(p, 0, STATE_SHININESS); emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); release_temp(p, shininess); } _col0 = make_temp(p, get_scenecolor(p, 0)); if (separate) _col1 = make_temp(p, get_identity_param(p)); else _col1 = _col0; } if (twoside) { if (!p->state->material_shininess_is_zero) { /* Note that we negate the back-face specular exponent here. * The negation will be un-done later in the back-face code below. */ struct ureg shininess = get_material(p, 1, STATE_SHININESS); emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, negate(swizzle1(shininess,X))); release_temp(p, shininess); } _bfc0 = make_temp(p, get_scenecolor(p, 1)); if (separate) _bfc1 = make_temp(p, get_identity_param(p)); else _bfc1 = _bfc0; } /* If no lights, still need to emit the scenecolor. */ { struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); emit_op1(p, OPCODE_MOV, res0, 0, _col0); } if (separate) { struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); emit_op1(p, OPCODE_MOV, res1, 0, _col1); } if (twoside) { struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); } if (twoside && separate) { struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); } if (nr_lights == 0) { release_temps(p); return; } for (i = 0; i < MAX_LIGHTS; i++) { if (p->state->unit[i].light_enabled) { struct ureg half = undef; struct ureg att = undef, VPpli = undef; count++; if (p->state->unit[i].light_eyepos3_is_zero) { /* Can used precomputed constants in this case. * Attenuation never applies to infinite lights. */ VPpli = register_param3(p, STATE_INTERNAL, STATE_LIGHT_POSITION_NORMALIZED, i); if (!p->state->material_shininess_is_zero) { if (p->state->light_local_viewer) { struct ureg eye_hat = get_eye_position_normalized(p); half = get_temp(p); emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); emit_normalize_vec3(p, half, half); } else { half = register_param3(p, STATE_INTERNAL, STATE_LIGHT_HALF_VECTOR, i); } } } else { struct ureg Ppli = register_param3(p, STATE_INTERNAL, STATE_LIGHT_POSITION, i); struct ureg V = get_eye_position(p); struct ureg dist = get_temp(p); VPpli = get_temp(p); /* Calculate VPpli vector */ emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); /* Normalize VPpli. The dist value also used in * attenuation below. */ emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); emit_op1(p, OPCODE_RSQ, dist, 0, dist); emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); /* Calculate attenuation: */ if (!p->state->unit[i].light_spotcutoff_is_180 || p->state->unit[i].light_attenuated) { att = calculate_light_attenuation(p, i, VPpli, dist); } /* Calculate viewer direction, or use infinite viewer: */ if (!p->state->material_shininess_is_zero) { half = get_temp(p); if (p->state->light_local_viewer) { struct ureg eye_hat = get_eye_position_normalized(p); emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); } else { struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); } emit_normalize_vec3(p, half, half); } release_temp(p, dist); } /* Calculate dot products: */ if (p->state->material_shininess_is_zero) { emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); } else { emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); } /* Front face lighting: */ { struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); struct ureg res0, res1; GLuint mask0, mask1; if (count == nr_lights) { if (separate) { mask0 = WRITEMASK_XYZ; mask1 = WRITEMASK_XYZ; res0 = register_output( p, VERT_RESULT_COL0 ); res1 = register_output( p, VERT_RESULT_COL1 ); } else { mask0 = 0; mask1 = WRITEMASK_XYZ; res0 = _col0; res1 = register_output( p, VERT_RESULT_COL0 ); } } else { mask0 = 0; mask1 = 0; res0 = _col0; res1 = _col1; } if (!is_undef(att)) { /* light is attenuated by distance */ emit_op1(p, OPCODE_LIT, lit, 0, dots); emit_op2(p, OPCODE_MUL, lit, 0, lit, att); emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); } else if (!p->state->material_shininess_is_zero) { /* there's a non-zero specular term */ emit_op1(p, OPCODE_LIT, lit, 0, dots); emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); } else { /* no attenutation, no specular */ emit_degenerate_lit(p, lit, dots); emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); } emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); release_temp(p, ambient); release_temp(p, diffuse); release_temp(p, specular); } /* Back face lighting: */ if (twoside) { struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); struct ureg res0, res1; GLuint mask0, mask1; if (count == nr_lights) { if (separate) { mask0 = WRITEMASK_XYZ; mask1 = WRITEMASK_XYZ; res0 = register_output( p, VERT_RESULT_BFC0 ); res1 = register_output( p, VERT_RESULT_BFC1 ); } else { mask0 = 0; mask1 = WRITEMASK_XYZ; res0 = _bfc0; res1 = register_output( p, VERT_RESULT_BFC0 ); } } else { res0 = _bfc0; res1 = _bfc1; mask0 = 0; mask1 = 0; } /* For the back face we need to negate the X and Y component * dot products. dots.Z has the negated back-face specular * exponent. We swizzle that into the W position. This * negation makes the back-face specular term positive again. */ dots = negate(swizzle(dots,X,Y,W,Z)); if (!is_undef(att)) { emit_op1(p, OPCODE_LIT, lit, 0, dots); emit_op2(p, OPCODE_MUL, lit, 0, lit, att); emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); } else if (!p->state->material_shininess_is_zero) { emit_op1(p, OPCODE_LIT, lit, 0, dots); emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ } else { emit_degenerate_lit(p, lit, dots); emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); } emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); /* restore dots to its original state for subsequent lights * by negating and swizzling again. */ dots = negate(swizzle(dots,X,Y,W,Z)); release_temp(p, ambient); release_temp(p, diffuse); release_temp(p, specular); } release_temp(p, half); release_temp(p, VPpli); release_temp(p, att); } } release_temps( p ); }
ir_swizzle * swizzle_yyyy(operand a) { return swizzle(a, SWIZZLE_YYYY, 4); }
ir_swizzle * swizzle_xxxx(operand a) { return swizzle(a, SWIZZLE_XXXX, 4); }
void BVH4iIntersector1::occluded(BVH4i* bvh, Ray& ray) { /* near and node stack */ __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; /* setup */ const mic3f rdir16 = rcp_safe(mic3f(ray.dir.x,ray.dir.y,ray.dir.z)); const mic_f inf = mic_f(pos_inf); const mic_f zero = mic_f::zero(); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr(); stack_node[0] = BVH4i::invalidNode; stack_node[1] = bvh->root; size_t sindex = 2; const mic_f org_xyz = loadAOS4to16f(ray.org.x,ray.org.y,ray.org.z); const mic_f dir_xyz = loadAOS4to16f(ray.dir.x,ray.dir.y,ray.dir.z); const mic_f rdir_xyz = loadAOS4to16f(rdir16.x[0],rdir16.y[0],rdir16.z[0]); const mic_f org_rdir_xyz = org_xyz * rdir_xyz; const mic_f min_dist_xyz = broadcast1to16f(&ray.tnear); const mic_f max_dist_xyz = broadcast1to16f(&ray.tfar); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node[sindex-1]; sindex--; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; const Node* __restrict__ const node = curNode.node(nodes); const float* __restrict const plower = (float*)node->lower; const float* __restrict const pupper = (float*)node->upper; prefetch<PFHINT_L1>((char*)node + 0); prefetch<PFHINT_L1>((char*)node + 64); /* intersect single ray with 4 bounding boxes */ const mic_f tLowerXYZ = load16f(plower) * rdir_xyz - org_rdir_xyz; const mic_f tUpperXYZ = load16f(pupper) * rdir_xyz - org_rdir_xyz; const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ); const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ); sindex--; curNode = stack_node[sindex]; const Node* __restrict__ const next = curNode.node(nodes); prefetch<PFHINT_L2>((char*)next + 0); prefetch<PFHINT_L2>((char*)next + 64); const mic_f tNear = vreduce_max4(tLower); const mic_f tFar = vreduce_min4(tUpper); const mic_m hitm = le(0x8888,tNear,tFar); const mic_f tNear_pos = select(hitm,tNear,inf); /* if no child is hit, continue with early popped child */ if (unlikely(none(hitm))) continue; sindex++; const unsigned long hiti = toInt(hitm); const unsigned long pos_first = bitscan64(hiti); const unsigned long num_hitm = countbits(hiti); /* if a single child is hit, continue with that child */ curNode = ((unsigned int *)plower)[pos_first]; if (likely(num_hitm == 1)) continue; /* if two children are hit, push in correct order */ const unsigned long pos_second = bitscan64(pos_first,hiti); if (likely(num_hitm == 2)) { const unsigned int dist_first = ((unsigned int*)&tNear)[pos_first]; const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second]; const unsigned int node_first = curNode; const unsigned int node_second = ((unsigned int*)plower)[pos_second]; if (dist_first <= dist_second) { stack_node[sindex] = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } else { stack_node[sindex] = curNode; curNode = node_second; sindex++; assert(sindex < 3*BVH4i::maxDepth+1); continue; } } /* continue with closest child and push all others */ const mic_f min_dist = set_min_lanes(tNear_pos); const unsigned old_sindex = sindex; sindex += countbits(hiti) - 1; assert(sindex < 3*BVH4i::maxDepth+1); const mic_m closest_child = eq(hitm,min_dist,tNear); const unsigned long closest_child_pos = bitscan64(closest_child); const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1))); const mic_i plower_node = load16i((int*)plower); curNode = ((unsigned int*)plower)[closest_child_pos]; compactustore16i(m_pos,&stack_node[old_sindex],plower_node); } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ ////////////////////////////////////////////////////////////////////////////////////////////////// const Triangle1* tptr = (Triangle1*) curNode.leaf(accel); prefetch<PFHINT_L1>(tptr + 3); prefetch<PFHINT_L1>(tptr + 2); prefetch<PFHINT_L1>(tptr + 1); prefetch<PFHINT_L1>(tptr + 0); const mic_i and_mask = broadcast4to16i(zlc4); const mic_f v0 = gather_4f_zlc(and_mask, (float*)&tptr[0].v0, (float*)&tptr[1].v0, (float*)&tptr[2].v0, (float*)&tptr[3].v0); const mic_f v1 = gather_4f_zlc(and_mask, (float*)&tptr[0].v1, (float*)&tptr[1].v1, (float*)&tptr[2].v1, (float*)&tptr[3].v1); const mic_f v2 = gather_4f_zlc(and_mask, (float*)&tptr[0].v2, (float*)&tptr[1].v2, (float*)&tptr[2].v2, (float*)&tptr[3].v2); const mic_f e1 = v1 - v0; const mic_f e2 = v0 - v2; const mic_f normal = lcross_zxy(e1,e2); const mic_f org = v0 - org_xyz; const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB)); const mic_f den = ldot3_zxy(dir_xyz,normal); const mic_f rcp_den = rcp(den); const mic_f uu = ldot3_zxy(e2,odzxy); const mic_f vv = ldot3_zxy(e1,odzxy); const mic_f u = uu * rcp_den; const mic_f v = vv * rcp_den; #if defined(__BACKFACE_CULLING__) const mic_m m_init = (mic_m)0x1111 & (den > zero); #else const mic_m m_init = 0x1111; #endif const mic_m valid_u = ge(m_init,u,zero); const mic_m valid_v = ge(valid_u,v,zero); const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); const mic_f nom = ldot3_zxy(org,normal); const mic_f t = rcp_den*nom; if (unlikely(none(m_aperture))) continue; mic_m m_final = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz); #if defined(__USE_RAY_MASK__) const mic_i rayMask(ray.mask); const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].v2,&tptr[1].v2,&tptr[2].v2,&tptr[3].v2)); const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero(); m_final &= m_ray_mask; #endif #if defined(__INTERSECTION_FILTER__) /* did the ray hit one of the four triangles? */ while (any(m_final)) { const mic_f temp_t = select(m_final,t,max_dist_xyz); const mic_f min_dist = vreduce_min(temp_t); const mic_m m_dist = eq(min_dist,temp_t); const size_t vecIndex = bitscan(toInt(m_dist)); const size_t triIndex = vecIndex >> 2; const Triangle1 *__restrict__ tri_ptr = tptr + triIndex; const mic_m m_tri = m_dist^(m_dist & (mic_m)((unsigned int)m_dist - 1)); const mic_f gnormalx = mic_f(tri_ptr->Ng.x); const mic_f gnormaly = mic_f(tri_ptr->Ng.y); const mic_f gnormalz = mic_f(tri_ptr->Ng.z); const int geomID = tri_ptr->geomID(); const int primID = tri_ptr->primID(); Geometry* geom = ((Scene*)bvh->geometry)->get(geomID); if (likely(!geom->hasOcclusionFilter1())) break; if (runOcclusionFilter1(geom,ray,u,v,min_dist,gnormalx,gnormaly,gnormalz,m_tri,geomID,primID)) break; m_final ^= m_tri; /* clear bit */ } #endif if (unlikely(any(m_final))) { ray.geomID = 0; return; } ////////////////////////////////////////////////////////////////////////////////////////////////// } }
static struct pipe_sampler_view * nv30_sampler_view_create(struct pipe_context *pipe, struct pipe_resource *pt, const struct pipe_sampler_view *tmpl) { const struct nv30_texfmt *fmt = nv30_texfmt(pipe->screen, tmpl->format); struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d; struct nv30_miptree *mt = nv30_miptree(pt); struct nv30_sampler_view *so; so = MALLOC_STRUCT(nv30_sampler_view); if (!so) return NULL; so->pipe = *tmpl; so->pipe.reference.count = 1; so->pipe.texture = NULL; so->pipe.context = pipe; pipe_resource_reference(&so->pipe.texture, pt); so->fmt = NV30_3D_TEX_FORMAT_NO_BORDER; switch (pt->target) { case PIPE_TEXTURE_1D: so->fmt |= NV30_3D_TEX_FORMAT_DIMS_1D; break; case PIPE_TEXTURE_CUBE: so->fmt |= NV30_3D_TEX_FORMAT_CUBIC; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: so->fmt |= NV30_3D_TEX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: so->fmt |= NV30_3D_TEX_FORMAT_DIMS_3D; break; default: assert(0); so->fmt |= NV30_3D_TEX_FORMAT_DIMS_1D; break; } so->filt = fmt->filter; so->wrap = fmt->wrap; so->swz = fmt->swizzle; so->swz |= swizzle(fmt, 3, tmpl->swizzle_a); so->swz |= swizzle(fmt, 0, tmpl->swizzle_r) << 2; so->swz |= swizzle(fmt, 1, tmpl->swizzle_g) << 4; so->swz |= swizzle(fmt, 2, tmpl->swizzle_b) << 6; /* apparently, we need to ignore the t coordinate for 1D textures to * fix piglit tex1d-2dborder */ so->wrap_mask = ~0; if (pt->target == PIPE_TEXTURE_1D) { so->wrap_mask &= ~NV30_3D_TEX_WRAP_T__MASK; so->wrap |= NV30_3D_TEX_WRAP_T_REPEAT; } /* yet more hardware suckage, can't filter 32-bit float formats */ switch (tmpl->format) { case PIPE_FORMAT_R32_FLOAT: case PIPE_FORMAT_R32G32B32A32_FLOAT: so->filt_mask = ~(NV30_3D_TEX_FILTER_MIN__MASK | NV30_3D_TEX_FILTER_MAG__MASK); so->filt |= NV30_3D_TEX_FILTER_MIN_NEAREST | NV30_3D_TEX_FILTER_MAG_NEAREST; break; default: so->filt_mask = ~0; break; } so->npot_size0 = (pt->width0 << 16) | pt->height0; if (eng3d->oclass >= NV40_3D_CLASS) { so->npot_size1 = (pt->depth0 << 20) | mt->uniform_pitch; if (!mt->swizzled) so->fmt |= NV40_3D_TEX_FORMAT_LINEAR; so->fmt |= 0x00008000; so->fmt |= (pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT; } else { so->swz |= mt->uniform_pitch << NV30_3D_TEX_SWIZZLE_RECT_PITCH__SHIFT; if (pt->last_level) so->fmt |= NV30_3D_TEX_FORMAT_MIPMAP; so->fmt |= util_logbase2(pt->width0) << 20; so->fmt |= util_logbase2(pt->height0) << 24; so->fmt |= util_logbase2(pt->depth0) << 28; so->fmt |= 0x00010000; } so->base_lod = so->pipe.u.tex.first_level << 8; so->high_lod = MIN2(pt->last_level, so->pipe.u.tex.last_level) << 8; return &so->pipe; }
ir_swizzle * swizzle_xyzw(operand a) { return swizzle(a, SWIZZLE_XYZW, 4); }