GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
	const struct radeon_pair_handler* handler, void *userdata)
{
	struct pair_state s;

	_mesa_bzero(&s, sizeof(s));
	s.Ctx = ctx;
	s.Program = program;
	s.Handler = handler;
	s.UserData = userdata;
	s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
	s.Verbose = GL_FALSE && s.Debug;

	s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
		sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
	s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
	s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
		sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);

	if (s.Debug)
		_mesa_printf("Emit paired program\n");

	scan_instructions(&s);
	allocate_input_registers(&s);

	while(!s.Error &&
	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
		if (s.ReadyTEX)
			emit_all_tex(&s);

		while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
			emit_alu(&s);
	}

	if (s.Debug)
		_mesa_printf(" END\n");

	_mesa_free(s.Instructions);
	_mesa_free(s.ValuePool);
	_mesa_free(s.ReaderPool);

	return !s.Error;
}
/**
 * Final compilation step: Turn the intermediate radeon_program into
 * machine-readable instructions.
 */
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
{
	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
	struct r300_emit_state emit;
	struct r300_fragment_program_code *code = &compiler->code->code.r300;
	unsigned int tex_end;

	memset(&emit, 0, sizeof(emit));
	emit.compiler = compiler;

	memset(code, 0, sizeof(struct r300_fragment_program_code));

	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
	    inst = inst->Next) {
		if (inst->Type == RC_INSTRUCTION_NORMAL) {
			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
				begin_tex(&emit);
				continue;
			}

			emit_tex(&emit, inst);
		} else {
			emit_alu(&emit, &inst->U.P);
		}
	}

	if (code->pixsize >= compiler->Base.max_temp_regs)
		rc_error(&compiler->Base, "Too many hardware temporaries used.\n");

	if (compiler->Base.Error)
		return;

	/* Finish the program */
	finish_node(&emit);

	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */

	/* Set r400 extended instruction fields.  These values will be ignored
	 * on r300 cards. */
	code->r400_code_offset_ext |=
		(get_msbs_alu(0)
				<< R400_ALU_OFFSET_MSB_SHIFT)
		| (get_msbs_alu(code->alu.length - 1)
				<< R400_ALU_SIZE_MSB_SHIFT);

	tex_end = code->tex.length ? code->tex.length - 1 : 0;
	code->code_offset =
		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
			& R300_PFS_CNTL_ALU_OFFSET_MASK)
		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
			& R300_PFS_CNTL_ALU_END_MASK)
		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
			& R300_PFS_CNTL_TEX_OFFSET_MASK)
		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
			& R300_PFS_CNTL_TEX_END_MASK)
		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
		;

	if (emit.current_node < 3) {
		int shift = 3 - emit.current_node;
		int i;
		for(i = emit.current_node; i >= 0; --i)
			code->code_addr[shift + i] = code->code_addr[i];
		for(i = 0; i < shift; ++i)
			code->code_addr[i] = 0;
	}

	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
	    || code->alu.length > R300_PFS_MAX_ALU_INST
	    || code->tex.length > R300_PFS_MAX_TEX_INST) {

		code->r390_mode = 1;
	}
}
/**
 * Finish the current node without advancing to the next one.
 */
static int finish_node(struct r300_emit_state * emit)
{
	struct r300_fragment_program_compiler * c = emit->compiler;
	struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
	unsigned alu_offset;
	unsigned alu_end;
	unsigned tex_offset;
	unsigned tex_end;

	unsigned int alu_offset_msbs, alu_end_msbs;

	if (code->alu.length == emit->node_first_alu) {
		/* Generate a single NOP for this node */
		struct rc_pair_instruction inst;
		memset(&inst, 0, sizeof(inst));
		if (!emit_alu(emit, &inst))
			return 0;
	}

	alu_offset = emit->node_first_alu;
	alu_end = code->alu.length - alu_offset - 1;
	tex_offset = emit->node_first_tex;
	tex_end = code->tex.length - tex_offset - 1;

	if (code->tex.length == emit->node_first_tex) {
		if (emit->current_node > 0) {
			error("Node %i has no TEX instructions", emit->current_node);
			return 0;
		}

		tex_end = 0;
	} else {
		if (emit->current_node == 0)
			code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
	}

	/* Write the config register.
	 * Note: The order in which the words for each node are written
	 * is not correct here and needs to be fixed up once we're entirely
	 * done
	 *
	 * Also note that the register specification from AMD is slightly
	 * incorrect in its description of this register. */
	code->code_addr[emit->current_node]  =
			((alu_offset << R300_ALU_START_SHIFT)
				& R300_ALU_START_MASK)
			| ((alu_end << R300_ALU_SIZE_SHIFT)
				& R300_ALU_SIZE_MASK)
			| ((tex_offset << R300_TEX_START_SHIFT)
				& R300_TEX_START_MASK)
			| ((tex_end << R300_TEX_SIZE_SHIFT)
				& R300_TEX_SIZE_MASK)
			| emit->node_flags
			| (get_msbs_tex(tex_offset, 5)
				<< R400_TEX_START_MSB_SHIFT)
			| (get_msbs_tex(tex_end, 5)
				<< R400_TEX_SIZE_MSB_SHIFT)
			;

	/* Write r400 extended instruction fields.  These will be ignored on
	 * r300 cards.  */
	alu_offset_msbs = get_msbs_alu(alu_offset);
	alu_end_msbs = get_msbs_alu(alu_end);
	switch(emit->current_node) {
	case 0:
		code->r400_code_offset_ext |=
			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
		break;
	case 1:
		code->r400_code_offset_ext |=
			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
		break;
	case 2:
		code->r400_code_offset_ext |=
			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
		break;
	case 3:
		code->r400_code_offset_ext |=
			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
		break;
	}
	return 1;
}