Example #1
0
static void scan_write(void * data, struct rc_instruction * inst,
		rc_register_file file, unsigned int index, unsigned int chan)
{
	struct schedule_state * s = data;
	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);

	if (!pv)
		return;

	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);

	struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
	memset(newv, 0, sizeof(*newv));

	newv->Writer = s->Current;

	if (*pv) {
		(*pv)->Next = newv;
		s->Current->NumDependencies++;
	}

	*pv = newv;

	if (s->Current->NumWriteValues >= 4) {
		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
	} else {
		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
	}
}
Example #2
0
static void scan_read(void * data, struct rc_instruction * inst,
		rc_register_file file, unsigned int index, unsigned int chan)
{
	struct schedule_state * s = data;
	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
	struct reg_value_reader * reader;

	if (!v)
		return;

	if (*v && (*v)->Writer == s->Current) {
		/* The instruction reads and writes to a register component.
		 * In this case, we only want to increment dependencies by one.
		 */
		return;
	}

	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);

	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
	reader->Reader = s->Current;
	if (!*v) {
		/* In this situation, the instruction reads from a register
		 * that hasn't been written to or read from in the current
		 * block. */
		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
		memset(*v, 0, sizeof(struct reg_value));
		(*v)->Readers = reader;
	} else {
		reader->Next = (*v)->Readers;
		(*v)->Readers = reader;
		/* Only update the current instruction's dependencies if the
		 * register it reads from has been written to in this block. */
		if ((*v)->Writer) {
			s->Current->NumDependencies++;
		}
	}
	(*v)->NumReaders++;

	if (s->Current->NumReadValues >= 12) {
		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
	} else {
		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
	}
}
static void schedule_block(struct r300_fragment_program_compiler * c,
		struct rc_instruction * begin, struct rc_instruction * end)
{
	struct schedule_state s;
	unsigned int ip;

	memset(&s, 0, sizeof(s));
	s.C = &c->Base;

	/* Scan instructions for data dependencies */
	ip = 0;
	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
		s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
		memset(s.Current, 0, sizeof(struct schedule_instruction));

		s.Current->Instruction = inst;
		inst->IP = ip++;

		DBG("%i: Scanning\n", inst->IP);

		/* The order of things here is subtle and maybe slightly
		 * counter-intuitive, to account for the case where an
		 * instruction writes to the same register as it reads
		 * from. */
		rc_for_all_writes_chan(inst, &scan_write, &s);
		rc_for_all_reads_chan(inst, &scan_read, &s);

		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);

		if (!s.Current->NumDependencies)
			instruction_ready(&s, s.Current);

		/* Get global readers for possible RGB->Alpha conversion. */
		rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
				is_rgb_to_alpha_possible_normal,
				is_rgb_to_alpha_possible, NULL);
	}

	/* Temporarily unlink all instructions */
	begin->Prev->Next = end;
	end->Prev = begin->Prev;

	/* Schedule instructions back */
	while(!s.C->Error &&
	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
		if (s.ReadyTEX)
			emit_all_tex(&s, end);

		while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
			emit_one_alu(&s, end);
	}
}
Example #4
0
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
{
	struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));

	memset(inst, 0, sizeof(struct rc_instruction));

	inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
	inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
	inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;

	return inst;
}
static void grow_branches(struct emit_state * s)
{
	unsigned int newreserved = s->BranchesReserved * 2;
	struct branch_info * newbranches;

	if (!newreserved)
		newreserved = 4;

	newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
	memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));

	s->Branches = newbranches;
	s->BranchesReserved = newreserved;
}
void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps)
{
	struct regalloc_state s;

	memset(&s, 0, sizeof(s));
	s.C = &c->Base;
	s.NumHwTemporaries = maxtemps;
	s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
	memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));

	compute_live_intervals(&s);

	c->AllocateHwInputs(c, &alloc_input, &s);

	do_regalloc(&s);
}
/**
 * This function renames registers in an attempt to get the code close to
 * SSA form.  After this function has completed, most of the register are only
 * written to one time, with a few exceptions.
 *
 * This function assumes all the instructions are still of type
 * RC_INSTRUCTION_NORMAL.
 */
void rc_rename_regs(struct radeon_compiler *c, void *user)
{
	unsigned int i, used_length;
	int new_index;
	struct rc_instruction * inst;
	struct rc_reader_data reader_data;
	unsigned char * used;

	/* XXX Remove this once the register allocation works with flow control. */
	for(inst = c->Program.Instructions.Next;
					inst != &c->Program.Instructions;
					inst = inst->Next) {
		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
			return;
	}

	used_length = 2 * rc_recompute_ips(c);
	used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
	memset(used, 0, sizeof(unsigned char) * used_length);

	rc_get_used_temporaries(c, used, used_length);
	for(inst = c->Program.Instructions.Next;
					inst != &c->Program.Instructions;
					inst = inst->Next) {

		if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
			continue;

		reader_data.ExitOnAbort = 1;
		rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);

		if (reader_data.Abort || reader_data.ReaderCount == 0)
			continue;

		new_index = rc_find_free_temporary_list(c, used, used_length,
						RC_MASK_XYZW);
		if (new_index < 0) {
			rc_error(c, "Ran out of temporary registers\n");
			return;
		}

		reader_data.Writer->U.I.DstReg.Index = new_index;
		for(i = 0; i < reader_data.ReaderCount; i++) {
			reader_data.Readers[i].U.I.Src->Index = new_index;
		}
	}
}
static void compute_live_intervals(struct radeon_compiler *c,
				   struct regalloc_state *s)
{
	memset(s, 0, sizeof(*s));
	s->C = c;
	s->NumHwTemporaries = c->max_temp_regs;
	s->HwTemporary =
		memory_pool_malloc(&c->Pool,
				   s->NumHwTemporaries * sizeof(struct hardware_register));
	memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));

	rc_recompute_ips(s->C);

	for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
	    inst != &s->C->Program.Instructions;
	    inst = inst->Next) {

		/* For all instructions inside of a loop, the ENDLOOP
		 * instruction is used as the end of the live interval. */
		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
			int loops = 1;
			struct rc_instruction * tmp;
			for(tmp = inst->Next;
					tmp != &s->C->Program.Instructions;
					tmp = tmp->Next) {
				if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
					loops++;
				} else if (tmp->U.I.Opcode
							== RC_OPCODE_ENDLOOP) {
					if(!--loops) {
						s->end_loop = tmp->IP;
						break;
					}
				}
			}
		}

		if (inst->IP == s->end_loop)
			s->end_loop = 0;

		rc_for_all_reads_mask(inst, scan_callback, s);
		rc_for_all_writes_mask(inst, scan_callback, s);
	}
}
static void add_live_intervals(struct regalloc_state * s,
		struct live_intervals ** dst, struct live_intervals * src)
{
	struct live_intervals ** dst_backup = dst;

	if (VERBOSE) {
		DBG("add_live_intervals: ");
		print_live_intervals(*dst);
		DBG(" to ");
		print_live_intervals(src);
		DBG("\n");
	}

	while(src) {
		if (*dst && (*dst)->End < src->Start) {
			dst = &(*dst)->Next;
		} else if (!*dst || (*dst)->Start > src->End) {
			struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
			li->Start = src->Start;
			li->End = src->End;
			li->Next = *dst;
			*dst = li;
			src = src->Next;
		} else {
			if (src->End > (*dst)->End)
				(*dst)->End = src->End;
			if (src->Start < (*dst)->Start)
				(*dst)->Start = src->Start;
			src = src->Next;
		}
	}

	if (VERBOSE) {
		DBG("    result: ");
		print_live_intervals(*dst_backup);
		DBG("\n");
	}
}
Example #10
0
static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
{
	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
	struct rc_instruction *inst;
	struct rc_instruction *end_loop = NULL;
	unsigned int num_orig_temps = 0;
	char hwtemps[RC_REGISTER_MAX_INDEX];
	struct temporary_allocation * ta;
	unsigned int i, j;

	memset(hwtemps, 0, sizeof(hwtemps));

	rc_recompute_ips(c);

	/* Pass 1: Count original temporaries. */
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
					num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
			}
		}

		if (opcode->HasDstReg) {
			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
				if (inst->U.I.DstReg.Index >= num_orig_temps)
					num_orig_temps = inst->U.I.DstReg.Index + 1;
			}
		}
	}

	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
			sizeof(struct temporary_allocation) * num_orig_temps);
	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);

	/* Pass 2: Determine original temporary lifetimes */
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
		/* Instructions inside of loops need to use the ENDLOOP
		 * instruction as their LastRead. */
		if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
			int endloops = 1;
			struct rc_instruction * ptr;
			for(ptr = inst->Next;
				ptr != &compiler->Base.Program.Instructions;
							ptr = ptr->Next){
				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
					endloops++;
				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
					endloops--;
					if (endloops <= 0) {
						end_loop = ptr;
						break;
					}
				}
			}
		}

		if (inst == end_loop) {
			end_loop = NULL;
			continue;
		}

		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
			}
		}
	}

	/* Pass 3: Register allocation */
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				unsigned int orig = inst->U.I.SrcReg[i].Index;
				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;

				if (ta[orig].Allocated && inst == ta[orig].LastRead)
					hwtemps[ta[orig].HwTemp] = 0;
			}
		}

		if (opcode->HasDstReg) {
			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
				unsigned int orig = inst->U.I.DstReg.Index;

				if (!ta[orig].Allocated) {
					for(j = 0; j < c->max_temp_regs; ++j) {
						if (!hwtemps[j])
							break;
					}
					ta[orig].Allocated = 1;
					ta[orig].HwTemp = j;
					hwtemps[ta[orig].HwTemp] = 1;
				}

				inst->U.I.DstReg.Index = ta[orig].HwTemp;
			}
		}
	}
}
Example #11
0
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
	struct rc_instruction *inst;
	unsigned int num_orig_temps = 0;
	char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
	struct temporary_allocation * ta;
	unsigned int i, j;

	compiler->code->num_temporaries = 0;
	memset(hwtemps, 0, sizeof(hwtemps));

	/* Pass 1: Count original temporaries and allocate structures */
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
					num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
			}
		}

		if (opcode->HasDstReg) {
			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
				if (inst->U.I.DstReg.Index >= num_orig_temps)
					num_orig_temps = inst->U.I.DstReg.Index + 1;
			}
		}
	}

	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
			sizeof(struct temporary_allocation) * num_orig_temps);
	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);

	/* Pass 2: Determine original temporary lifetimes */
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
				ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
		}
	}

	/* Pass 3: Register allocation */
	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);

		for (i = 0; i < opcode->NumSrcRegs; ++i) {
			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
				unsigned int orig = inst->U.I.SrcReg[i].Index;
				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;

				if (ta[orig].Allocated && inst == ta[orig].LastRead)
					hwtemps[ta[orig].HwTemp] = 0;
			}
		}

		if (opcode->HasDstReg) {
			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
				unsigned int orig = inst->U.I.DstReg.Index;

				if (!ta[orig].Allocated) {
					for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
						if (!hwtemps[j])
							break;
					}
					if (j >= VSF_MAX_FRAGMENT_TEMPS) {
						fprintf(stderr, "Out of hw temporaries\n");
					} else {
						ta[orig].Allocated = 1;
						ta[orig].HwTemp = j;
						hwtemps[j] = 1;

						if (j >= compiler->code->num_temporaries)
							compiler->code->num_temporaries = j + 1;
					}
				}

				inst->U.I.DstReg.Index = ta[orig].HwTemp;
			}
		}
	}
}
static void schedule_block(struct schedule_state * s,
		struct rc_instruction * begin, struct rc_instruction * end)
{
	unsigned int ip;

	/* Scan instructions for data dependencies */
	ip = 0;
	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
		memset(s->Current, 0, sizeof(struct schedule_instruction));

		if (inst->Type == RC_INSTRUCTION_NORMAL) {
			const struct rc_opcode_info * info =
					rc_get_opcode_info(inst->U.I.Opcode);
			if (info->HasTexture) {
				s->TEXCount++;
			}
		}

		/* XXX: This causes SemWait to be set for all instructions in
		 * a block if the previous block contained a TEX instruction.
		 * We can do better here, but it will take a lot of work. */
		if (s->PrevBlockHasTex) {
			s->Current->TexReadCount = 1;
		}

		s->Current->Instruction = inst;
		inst->IP = ip++;

		DBG("%i: Scanning\n", inst->IP);

		/* The order of things here is subtle and maybe slightly
		 * counter-intuitive, to account for the case where an
		 * instruction writes to the same register as it reads
		 * from. */
		rc_for_all_writes_chan(inst, &scan_write, s);
		rc_for_all_reads_chan(inst, &scan_read, s);

		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);

		if (!s->Current->NumDependencies) {
			instruction_ready(s, s->Current);
		}

		/* Get global readers for possible RGB->Alpha conversion. */
		s->Current->GlobalReaders.ExitOnAbort = 1;
		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
				is_rgb_to_alpha_possible_normal,
				is_rgb_to_alpha_possible, NULL);
	}

	/* Temporarily unlink all instructions */
	begin->Prev->Next = end;
	end->Prev = begin->Prev;

	/* Schedule instructions back */
	while(!s->C->Error &&
	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
		emit_instruction(s, end);
	}
}
static void scan_read(void * data, struct rc_instruction * inst,
		rc_register_file file, unsigned int index, unsigned int chan)
{
	struct schedule_state * s = data;
	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
	struct reg_value_reader * reader;

	if (!v)
		return;

	if (*v && (*v)->Writer == s->Current) {
		/* The instruction reads and writes to a register component.
		 * In this case, we only want to increment dependencies by one.
		 * Why?
		 * Because each instruction depends on the writers of its source
		 * registers _and_ the most recent writer of its destination
		 * register.  In this case, the current instruction (s->Current)
		 * has a dependency that both writes to one of its source
		 * registers and was the most recent writer to its destination
		 * register.  We have already marked this dependency in
		 * scan_write(), so we don't need to do it again.
		 */

		/* We need to make sure we are adding s->Current to the
		 * previous writer's list of TexReaders, if the previous writer
		 * was a TEX instruction.
		 */
		add_tex_reader(s, s->PrevWriter[chan], s->Current);

		return;
	}

	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);

	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
	reader->Reader = s->Current;
	if (!*v) {
		/* In this situation, the instruction reads from a register
		 * that hasn't been written to or read from in the current
		 * block. */
		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
		memset(*v, 0, sizeof(struct reg_value));
		(*v)->Readers = reader;
	} else {
		reader->Next = (*v)->Readers;
		(*v)->Readers = reader;
		/* Only update the current instruction's dependencies if the
		 * register it reads from has been written to in this block. */
		if ((*v)->Writer) {
			add_tex_reader(s, (*v)->Writer, s->Current);
			s->Current->NumDependencies++;
		}
	}
	(*v)->NumReaders++;

	if (s->Current->NumReadValues >= 12) {
		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
	} else {
		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
	}
}