示例#1
0
template<typename VectorType> void lpNorm(const VectorType& v)
{
  VectorType u = VectorType::Random(v.size());

  VERIFY_IS_APPROX(u.template lpNorm<Infinity>(), u.cwise().abs().maxCoeff());
  VERIFY_IS_APPROX(u.template lpNorm<1>(), u.cwise().abs().sum());
  VERIFY_IS_APPROX(u.template lpNorm<2>(), ei_sqrt(u.cwise().abs().cwise().square().sum()));
  VERIFY_IS_APPROX(ei_pow(u.template lpNorm<5>(), typename VectorType::RealScalar(5)), u.cwise().abs().cwise().pow(5).sum());
}
示例#2
0
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
	struct rc_instruction *rci;

	compiler->code->pos_end = 0;	/* Not supported yet */
	compiler->code->length = 0;

	compiler->SetHwInputOutput(compiler);

	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
		struct rc_sub_instruction *vpi = &rci->U.I;
		unsigned int *inst = compiler->code->body.d + compiler->code->length;

		/* Skip instructions writing to non-existing destination */
		if (!valid_dst(compiler->code, &vpi->DstReg))
			continue;

		if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
			return;
		}

		switch (vpi->Opcode) {
		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
		default:
			rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
			return;
		}

		compiler->code->length += 4;

		if (compiler->Base.Error)
			return;
	}
}
示例#3
0
static void translate_vertex_program(struct radeon_compiler *c, void *user)
{
	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
	struct rc_instruction *rci;

	struct loop * loops = NULL;
	int current_loop_depth = 0;
	int loops_reserved = 0;

	unsigned int branch_depth = 0;

	compiler->code->pos_end = 0;	/* Not supported yet */
	compiler->code->length = 0;
	compiler->code->num_temporaries = 0;

	compiler->SetHwInputOutput(compiler);

	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
		struct rc_sub_instruction *vpi = &rci->U.I;
		unsigned int *inst = compiler->code->body.d + compiler->code->length;
		const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);

		/* Skip instructions writing to non-existing destination */
		if (!valid_dst(compiler->code, &vpi->DstReg))
			continue;

		if (info->HasDstReg) {
			/* Neither is Saturate. */
			if (vpi->SaturateMode != RC_SATURATE_NONE) {
				rc_error(&compiler->Base, "Vertex program does not support the Saturate "
					 "modifier (yet).\n");
			}
		}

		if (compiler->code->length >= c->max_alu_insts * 4) {
			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
			return;
		}

		assert(compiler->Base.is_r500 ||
		       (vpi->Opcode != RC_OPCODE_SEQ &&
			vpi->Opcode != RC_OPCODE_SNE));

		switch (vpi->Opcode) {
		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
		case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
		case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
		case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
		case RC_OPCODE_BGNLOOP:
		{
			struct loop * l;

			if ((!compiler->Base.is_r500
				&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
				|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
				rc_error(&compiler->Base,
						"Loops are nested too deep.");
				return;
			}
			memory_pool_array_reserve(&compiler->Base.Pool,
					struct loop, loops, current_loop_depth,
					loops_reserved, 1);
			l = &loops[current_loop_depth++];
			memset(l , 0, sizeof(struct loop));
			l->BgnLoop = (compiler->code->length / 4);
			continue;
		}
		case RC_OPCODE_ENDLOOP:
		{
			struct loop * l;
			unsigned int act_addr;
			unsigned int last_addr;
			unsigned int ret_addr;

			assert(loops);
			l = &loops[current_loop_depth - 1];
			act_addr = l->BgnLoop - 1;
			last_addr = (compiler->code->length / 4) - 1;
			ret_addr = l->BgnLoop;

			if (loops_reserved >= R300_VS_MAX_FC_OPS) {
				rc_error(&compiler->Base,
					"Too many flow control instructions.");
				return;
			}
			if (compiler->Base.is_r500) {
				compiler->code->fc_op_addrs.r500
					[compiler->code->num_fc_ops].lw =
					R500_PVS_FC_ACT_ADRS(act_addr)
					| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
					;
				compiler->code->fc_op_addrs.r500
					[compiler->code->num_fc_ops].uw =
					R500_PVS_FC_LAST_INST(last_addr)
					| R500_PVS_FC_RTN_INST(ret_addr)
					;
			} else {
				compiler->code->fc_op_addrs.r300
					[compiler->code->num_fc_ops] =
					R300_PVS_FC_ACT_ADRS(act_addr)
					| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
					| R300_PVS_FC_LAST_INST(last_addr)
					| R300_PVS_FC_RTN_INST(ret_addr)
					;
			}
			compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
				R300_PVS_FC_LOOP_INIT_VAL(0x0)
				| R300_PVS_FC_LOOP_STEP_VAL(0x1)
				;
			compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
						compiler->code->num_fc_ops);
			compiler->code->num_fc_ops++;
			current_loop_depth--;
			continue;
		}

		default:
			rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
			return;
		}

		/* Non-flow control instructions that are inside an if statement
		 * need to pay attention to the predicate bit. */
		if (branch_depth
			&& vpi->Opcode != RC_OPCODE_IF
			&& vpi->Opcode != RC_OPCODE_ELSE
			&& vpi->Opcode != RC_OPCODE_ENDIF) {

			inst[0] |= (PVS_DST_PRED_ENABLE_MASK
						<< PVS_DST_PRED_ENABLE_SHIFT);
			inst[0] |= (PVS_DST_PRED_SENSE_MASK
						<< PVS_DST_PRED_SENSE_SHIFT);
		}

		/* Update the number of temporaries. */
		if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
		    vpi->DstReg.Index >= compiler->code->num_temporaries)
			compiler->code->num_temporaries = vpi->DstReg.Index + 1;

		for (unsigned i = 0; i < info->NumSrcRegs; i++)
			if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
			    vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
				compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;

		if (compiler->PredicateMask)
			if (compiler->PredicateIndex >= compiler->code->num_temporaries)
				compiler->code->num_temporaries = compiler->PredicateIndex + 1;

		if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
			rc_error(&compiler->Base, "Too many temporaries.\n");
			return;
		}

		compiler->code->length += 4;

		if (compiler->Base.Error)
			return;
	}
}
示例#4
0
static void translate_vertex_program(struct radeon_compiler *c, void *user)
{
	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
	struct rc_instruction *rci;

	unsigned loops[R500_PVS_MAX_LOOP_DEPTH];
	unsigned loop_depth = 0;

	compiler->code->pos_end = 0;	/* Not supported yet */
	compiler->code->length = 0;
	compiler->code->num_temporaries = 0;

	compiler->SetHwInputOutput(compiler);

	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
		struct rc_sub_instruction *vpi = &rci->U.I;
		unsigned int *inst = compiler->code->body.d + compiler->code->length;
		const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);

		/* Skip instructions writing to non-existing destination */
		if (!valid_dst(compiler->code, &vpi->DstReg))
			continue;

		if (info->HasDstReg) {
			/* Neither is Saturate. */
			if (vpi->SaturateMode != RC_SATURATE_NONE && !c->is_r500) {
				rc_error(&compiler->Base, "Vertex program does not support the Saturate "
					 "modifier (yet).\n");
			}
		}

		if (compiler->code->length >= c->max_alu_insts * 4) {
			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
			return;
		}

		assert(compiler->Base.is_r500 ||
		       (vpi->Opcode != RC_OPCODE_SEQ &&
			vpi->Opcode != RC_OPCODE_SNE));

		switch (vpi->Opcode) {
		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
		case RC_OPCODE_BGNLOOP:
		{
			if ((!compiler->Base.is_r500
				&& loop_depth >= R300_VS_MAX_LOOP_DEPTH)
				|| loop_depth >= R500_PVS_MAX_LOOP_DEPTH) {
				rc_error(&compiler->Base,
						"Loops are nested too deep.");
				return;
			}
			loops[loop_depth++] = ((compiler->code->length)/ 4) + 1;
			break;
		}
		case RC_OPCODE_ENDLOOP:
		{
			unsigned int act_addr;
			unsigned int last_addr;
			unsigned int ret_addr;

			ret_addr = loops[--loop_depth];
			act_addr = ret_addr - 1;
			last_addr = (compiler->code->length / 4) - 1;

			if (loop_depth >= R300_VS_MAX_FC_OPS) {
				rc_error(&compiler->Base,
					"Too many flow control instructions.");
				return;
			}
			if (compiler->Base.is_r500) {
				compiler->code->fc_op_addrs.r500
					[compiler->code->num_fc_ops].lw =
					R500_PVS_FC_ACT_ADRS(act_addr)
					| R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff)
					;
				compiler->code->fc_op_addrs.r500
					[compiler->code->num_fc_ops].uw =
					R500_PVS_FC_LAST_INST(last_addr)
					| R500_PVS_FC_RTN_INST(ret_addr)
					;
			} else {
				compiler->code->fc_op_addrs.r300
					[compiler->code->num_fc_ops] =
					R300_PVS_FC_ACT_ADRS(act_addr)
					| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
					| R300_PVS_FC_LAST_INST(last_addr)
					| R300_PVS_FC_RTN_INST(ret_addr)
					;
			}
			compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
				R300_PVS_FC_LOOP_INIT_VAL(0x0)
				| R300_PVS_FC_LOOP_STEP_VAL(0x1)
				;
			compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
						compiler->code->num_fc_ops);
			compiler->code->num_fc_ops++;

			break;
		}

		case RC_ME_PRED_SET_CLR:
			ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst);
			break;

		case RC_ME_PRED_SET_INV:
			ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst);
			break;

		case RC_ME_PRED_SET_POP:
			ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst);
			break;

		case RC_ME_PRED_SET_RESTORE:
			ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst);
			break;

		case RC_ME_PRED_SEQ:
			ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst);
			break;

		case RC_ME_PRED_SNEQ:
			ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst);
			break;

		case RC_VE_PRED_SNEQ_PUSH:
			ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH,
								vpi, inst);
			break;

		default:
			rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
			return;
		}

		if (vpi->DstReg.Pred != RC_PRED_DISABLED) {
			inst[0] |= (PVS_DST_PRED_ENABLE_MASK
						<< PVS_DST_PRED_ENABLE_SHIFT);
			if (vpi->DstReg.Pred == RC_PRED_SET) {
				inst[0] |= (PVS_DST_PRED_SENSE_MASK
						<< PVS_DST_PRED_SENSE_SHIFT);
			}
		}

		/* Update the number of temporaries. */
		if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
		    vpi->DstReg.Index >= compiler->code->num_temporaries)
			compiler->code->num_temporaries = vpi->DstReg.Index + 1;

		for (unsigned i = 0; i < info->NumSrcRegs; i++)
			if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
			    vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
				compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;

		if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
			rc_error(&compiler->Base, "Too many temporaries.\n");
			return;
		}

		compiler->code->length += 4;

		if (compiler->Base.Error)
			return;
	}
}