unsigned ac_count_scratch_private_memory(LLVMValueRef function) { unsigned private_mem_vgprs = 0; /* Process all LLVM instructions. */ LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); while (bb) { LLVMValueRef next = LLVMGetFirstInstruction(bb); while (next) { LLVMValueRef inst = next; next = LLVMGetNextInstruction(next); if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) continue; LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); /* No idea why LLVM aligns allocas to 4 elements. */ unsigned alignment = LLVMGetAlignment(inst); unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); private_mem_vgprs += dw_size; } bb = LLVMGetNextBasicBlock(bb); } return private_mem_vgprs; }
void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, LLVMValueRef main_fn, uint8_t *vs_output_param_offset, uint32_t num_outputs, uint8_t *num_param_exports) { LLVMBasicBlockRef bb; bool removed_any = false; struct ac_vs_exports exports; exports.num = 0; /* Process all LLVM instructions. */ bb = LLVMGetFirstBasicBlock(main_fn); while (bb) { LLVMValueRef inst = LLVMGetFirstInstruction(bb); while (inst) { LLVMValueRef cur = inst; inst = LLVMGetNextInstruction(inst); struct ac_vs_exp_inst exp; if (LLVMGetInstructionOpcode(cur) != LLVMCall) continue; LLVMValueRef callee = ac_llvm_get_called_value(cur); if (!ac_llvm_is_function(callee)) continue; const char *name = LLVMGetValueName(callee); unsigned num_args = LLVMCountParams(callee); /* Check if this is an export instruction. */ if ((num_args != 9 && num_args != 8) || (strcmp(name, "llvm.SI.export") && strcmp(name, "llvm.amdgcn.exp.f32"))) continue; LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); unsigned target = LLVMConstIntGetZExtValue(arg); if (target < V_008DFC_SQ_EXP_PARAM) continue; target -= V_008DFC_SQ_EXP_PARAM; /* Parse the instruction. */ memset(&exp, 0, sizeof(exp)); exp.offset = target; exp.inst = cur; for (unsigned i = 0; i < 4; i++) { LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i); exp.chan[i].value = v; if (LLVMIsUndef(v)) { exp.chan[i].type = AC_IR_UNDEF; } else if (LLVMIsAConstantFP(v)) { LLVMBool loses_info; exp.chan[i].type = AC_IR_CONST; exp.chan[i].const_float = LLVMConstRealGetDouble(v, &loses_info); } else { exp.chan[i].type = AC_IR_VALUE; } } /* Eliminate constant and duplicated PARAM exports. */ if (ac_eliminate_const_output(vs_output_param_offset, num_outputs, &exp) || ac_eliminate_duplicated_output(vs_output_param_offset, num_outputs, &exports, &exp)) { removed_any = true; } else { exports.exp[exports.num++] = exp; } } bb = LLVMGetNextBasicBlock(bb); } /* Remove holes in export memory due to removed PARAM exports. * This is done by renumbering all PARAM exports. */ if (removed_any) { uint8_t old_offset[VARYING_SLOT_MAX]; unsigned out, i; /* Make a copy of the offsets. We need the old version while * we are modifying some of them. */ memcpy(old_offset, vs_output_param_offset, sizeof(old_offset)); for (i = 0; i < exports.num; i++) { unsigned offset = exports.exp[i].offset; /* Update vs_output_param_offset. Multiple outputs can * have the same offset. */ for (out = 0; out < num_outputs; out++) { if (old_offset[out] == offset) vs_output_param_offset[out] = i; } /* Change the PARAM offset in the instruction. */ LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET, LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_PARAM + i, 0)); } *num_param_exports = exports.num; } }