static void scan_write(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; struct reg_value ** pv = get_reg_valuep(s, file, index, chan); if (!pv) return; DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); memset(newv, 0, sizeof(*newv)); newv->Writer = s->Current; if (*pv) { (*pv)->Next = newv; s->Current->NumDependencies++; } *pv = newv; if (s->Current->NumWriteValues >= 4) { rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); } else { s->Current->WriteValues[s->Current->NumWriteValues++] = newv; } }
static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. */ return; } DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; if (!*v) { /* In this situation, the instruction reads from a register * that hasn't been written to or read from in the current * block. */ *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); memset(*v, 0, sizeof(struct reg_value)); (*v)->Readers = reader; } else { reader->Next = (*v)->Readers; (*v)->Readers = reader; /* Only update the current instruction's dependencies if the * register it reads from has been written to in this block. */ if ((*v)->Writer) { s->Current->NumDependencies++; } } (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { s->Current->ReadValues[s->Current->NumReadValues++] = *v; } }
static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { struct schedule_state s; unsigned int ip; memset(&s, 0, sizeof(s)); s.C = &c->Base; /* Scan instructions for data dependencies */ ip = 0; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); memset(s.Current, 0, sizeof(struct schedule_instruction)); s.Current->Instruction = inst; inst->IP = ip++; DBG("%i: Scanning\n", inst->IP); /* The order of things here is subtle and maybe slightly * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ rc_for_all_writes_chan(inst, &scan_write, &s); rc_for_all_reads_chan(inst, &scan_read, &s); DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); if (!s.Current->NumDependencies) instruction_ready(&s, s.Current); /* Get global readers for possible RGB->Alpha conversion. */ rc_get_readers(s.C, inst, &s.Current->GlobalReaders, is_rgb_to_alpha_possible_normal, is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ begin->Prev->Next = end; end->Prev = begin->Prev; /* Schedule instructions back */ while(!s.C->Error && (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { if (s.ReadyTEX) emit_all_tex(&s, end); while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) emit_one_alu(&s, end); } }
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) { struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); memset(inst, 0, sizeof(struct rc_instruction)); inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; return inst; }
static void grow_branches(struct emit_state * s) { unsigned int newreserved = s->BranchesReserved * 2; struct branch_info * newbranches; if (!newreserved) newreserved = 4; newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info)); memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info)); s->Branches = newbranches; s->BranchesReserved = newreserved; }
void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps) { struct regalloc_state s; memset(&s, 0, sizeof(s)); s.C = &c->Base; s.NumHwTemporaries = maxtemps; s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register)); memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register)); compute_live_intervals(&s); c->AllocateHwInputs(c, &alloc_input, &s); do_regalloc(&s); }
/** * This function renames registers in an attempt to get the code close to * SSA form. After this function has completed, most of the register are only * written to one time, with a few exceptions. * * This function assumes all the instructions are still of type * RC_INSTRUCTION_NORMAL. */ void rc_rename_regs(struct radeon_compiler *c, void *user) { unsigned int i, used_length; int new_index; struct rc_instruction * inst; struct rc_reader_data reader_data; unsigned char * used; /* XXX Remove this once the register allocation works with flow control. */ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) return; } used_length = 2 * rc_recompute_ips(c); used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); memset(used, 0, sizeof(unsigned char) * used_length); rc_get_used_temporaries(c, used, used_length); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) continue; reader_data.ExitOnAbort = 1; rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); if (reader_data.Abort || reader_data.ReaderCount == 0) continue; new_index = rc_find_free_temporary_list(c, used, used_length, RC_MASK_XYZW); if (new_index < 0) { rc_error(c, "Ran out of temporary registers\n"); return; } reader_data.Writer->U.I.DstReg.Index = new_index; for(i = 0; i < reader_data.ReaderCount; i++) { reader_data.Readers[i].U.I.Src->Index = new_index; } } }
static void compute_live_intervals(struct radeon_compiler *c, struct regalloc_state *s) { memset(s, 0, sizeof(*s)); s->C = c; s->NumHwTemporaries = c->max_temp_regs; s->HwTemporary = memory_pool_malloc(&c->Pool, s->NumHwTemporaries * sizeof(struct hardware_register)); memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register)); rc_recompute_ips(s->C); for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { /* For all instructions inside of a loop, the ENDLOOP * instruction is used as the end of the live interval. */ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { int loops = 1; struct rc_instruction * tmp; for(tmp = inst->Next; tmp != &s->C->Program.Instructions; tmp = tmp->Next) { if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) { loops++; } else if (tmp->U.I.Opcode == RC_OPCODE_ENDLOOP) { if(!--loops) { s->end_loop = tmp->IP; break; } } } } if (inst->IP == s->end_loop) s->end_loop = 0; rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); } }
static void add_live_intervals(struct regalloc_state * s, struct live_intervals ** dst, struct live_intervals * src) { struct live_intervals ** dst_backup = dst; if (VERBOSE) { DBG("add_live_intervals: "); print_live_intervals(*dst); DBG(" to "); print_live_intervals(src); DBG("\n"); } while(src) { if (*dst && (*dst)->End < src->Start) { dst = &(*dst)->Next; } else if (!*dst || (*dst)->Start > src->End) { struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); li->Start = src->Start; li->End = src->End; li->Next = *dst; *dst = li; src = src->Next; } else { if (src->End > (*dst)->End) (*dst)->End = src->End; if (src->Start < (*dst)->Start) (*dst)->Start = src->Start; src = src->Next; } } if (VERBOSE) { DBG(" result: "); print_live_intervals(*dst_backup); DBG("\n"); } }
static void allocate_temporary_registers(struct radeon_compiler *c, void *user) { struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c; struct rc_instruction *inst; struct rc_instruction *end_loop = NULL; unsigned int num_orig_temps = 0; char hwtemps[RC_REGISTER_MAX_INDEX]; struct temporary_allocation * ta; unsigned int i, j; memset(hwtemps, 0, sizeof(hwtemps)); rc_recompute_ips(c); /* Pass 1: Count original temporaries. */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { if (inst->U.I.SrcReg[i].Index >= num_orig_temps) num_orig_temps = inst->U.I.SrcReg[i].Index + 1; } } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { if (inst->U.I.DstReg.Index >= num_orig_temps) num_orig_temps = inst->U.I.DstReg.Index + 1; } } } ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* Instructions inside of loops need to use the ENDLOOP * instruction as their LastRead. */ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { int endloops = 1; struct rc_instruction * ptr; for(ptr = inst->Next; ptr != &compiler->Base.Program.Instructions; ptr = ptr->Next){ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { endloops++; } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { endloops--; if (endloops <= 0) { end_loop = ptr; break; } } } } if (inst == end_loop) { end_loop = NULL; continue; } for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; } } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { unsigned int orig = inst->U.I.SrcReg[i].Index; inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) hwtemps[ta[orig].HwTemp] = 0; } } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < c->max_temp_regs; ++j) { if (!hwtemps[j]) break; } ta[orig].Allocated = 1; ta[orig].HwTemp = j; hwtemps[ta[orig].HwTemp] = 1; } inst->U.I.DstReg.Index = ta[orig].HwTemp; } } } }
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; unsigned int num_orig_temps = 0; char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; struct temporary_allocation * ta; unsigned int i, j; compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); /* Pass 1: Count original temporaries and allocate structures */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { if (inst->U.I.SrcReg[i].Index >= num_orig_temps) num_orig_temps = inst->U.I.SrcReg[i].Index + 1; } } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { if (inst->U.I.DstReg.Index >= num_orig_temps) num_orig_temps = inst->U.I.DstReg.Index + 1; } } } ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) ta[inst->U.I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { unsigned int orig = inst->U.I.SrcReg[i].Index; inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) hwtemps[ta[orig].HwTemp] = 0; } } if (opcode->HasDstReg) { if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { if (!hwtemps[j]) break; } if (j >= VSF_MAX_FRAGMENT_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { ta[orig].Allocated = 1; ta[orig].HwTemp = j; hwtemps[j] = 1; if (j >= compiler->code->num_temporaries) compiler->code->num_temporaries = j + 1; } } inst->U.I.DstReg.Index = ta[orig].HwTemp; } } } }
static void schedule_block(struct schedule_state * s, struct rc_instruction * begin, struct rc_instruction * end) { unsigned int ip; /* Scan instructions for data dependencies */ ip = 0; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); memset(s->Current, 0, sizeof(struct schedule_instruction)); if (inst->Type == RC_INSTRUCTION_NORMAL) { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); if (info->HasTexture) { s->TEXCount++; } } /* XXX: This causes SemWait to be set for all instructions in * a block if the previous block contained a TEX instruction. * We can do better here, but it will take a lot of work. */ if (s->PrevBlockHasTex) { s->Current->TexReadCount = 1; } s->Current->Instruction = inst; inst->IP = ip++; DBG("%i: Scanning\n", inst->IP); /* The order of things here is subtle and maybe slightly * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ rc_for_all_writes_chan(inst, &scan_write, s); rc_for_all_reads_chan(inst, &scan_read, s); DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); if (!s->Current->NumDependencies) { instruction_ready(s, s->Current); } /* Get global readers for possible RGB->Alpha conversion. */ s->Current->GlobalReaders.ExitOnAbort = 1; rc_get_readers(s->C, inst, &s->Current->GlobalReaders, is_rgb_to_alpha_possible_normal, is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ begin->Prev->Next = end; end->Prev = begin->Prev; /* Schedule instructions back */ while(!s->C->Error && (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { emit_instruction(s, end); } }
static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. * Why? * Because each instruction depends on the writers of its source * registers _and_ the most recent writer of its destination * register. In this case, the current instruction (s->Current) * has a dependency that both writes to one of its source * registers and was the most recent writer to its destination * register. We have already marked this dependency in * scan_write(), so we don't need to do it again. */ /* We need to make sure we are adding s->Current to the * previous writer's list of TexReaders, if the previous writer * was a TEX instruction. */ add_tex_reader(s, s->PrevWriter[chan], s->Current); return; } DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; if (!*v) { /* In this situation, the instruction reads from a register * that hasn't been written to or read from in the current * block. */ *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); memset(*v, 0, sizeof(struct reg_value)); (*v)->Readers = reader; } else { reader->Next = (*v)->Readers; (*v)->Readers = reader; /* Only update the current instruction's dependencies if the * register it reads from has been written to in this block. */ if ((*v)->Writer) { add_tex_reader(s, (*v)->Writer, s->Current); s->Current->NumDependencies++; } } (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { s->Current->ReadValues[s->Current->NumReadValues++] = *v; } }