/** Given the statements implementing a function, emit the code that implements the function. Most of the work do be done here just involves wiring up the function parameter values to be available in the function body code. */ void Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, SourcePos firstStmtPos) { // Connect the __mask builtin to the location in memory that stores its // value maskSymbol->storagePtr = ctx->GetFullMaskPointer(); // add debugging info for __mask maskSymbol->pos = firstStmtPos; ctx->EmitVariableDebugInfo(maskSymbol); #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_7 // LLVM 3.7+ if (g->NoOmitFramePointer) function->addFnAttr("no-frame-pointer-elim", "true"); #endif #if 0 llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock(); #endif const FunctionType *type = CastType<FunctionType>(sym->type); Assert(type != NULL); if (type->isTask == true #ifdef ISPC_NVPTX_ENABLED && (g->target->getISA() != Target::NVPTX) #endif ){ // For tasks, there should always be three parameters: the // pointer to the structure that holds all of the arguments, the // thread index, and the thread count variables. llvm::Function::arg_iterator argIter = function->arg_begin(); #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */ llvm::Value *structParamPtr = argIter++; llvm::Value *threadIndex = argIter++; llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; llvm::Value *taskIndex0 = argIter++; llvm::Value *taskIndex1 = argIter++; llvm::Value *taskIndex2 = argIter++; llvm::Value *taskCount0 = argIter++; llvm::Value *taskCount1 = argIter++; llvm::Value *taskCount2 = argIter++; #else /* LLVM 3.8+ */ llvm::Value *structParamPtr = &*(argIter++); llvm::Value *threadIndex = &*(argIter++); llvm::Value *threadCount = &*(argIter++); llvm::Value *taskIndex = &*(argIter++); llvm::Value *taskCount = &*(argIter++); llvm::Value *taskIndex0 = &*(argIter++); llvm::Value *taskIndex1 = &*(argIter++); llvm::Value *taskIndex2 = &*(argIter++); llvm::Value *taskCount0 = &*(argIter++); llvm::Value *taskCount1 = &*(argIter++); llvm::Value *taskCount2 = &*(argIter++); #endif // Copy the function parameter values from the structure into local // storage for (unsigned int i = 0; i < args.size(); ++i) lCopyInTaskParameter(i, structParamPtr, args, ctx); if (type->isUnmasked == false) { // Copy in the mask as well. int nArgs = (int)args.size(); // The mask is the last parameter in the argument structure llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL, "task_struct_mask"); llvm::Value *ptrval = ctx->LoadInst(ptr, "mask"); ctx->SetFunctionMask(ptrval); } // Copy threadIndex and threadCount into stack-allocated storage so // that their symbols point to something reasonable. threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex"); ctx->StoreInst(threadIndex, threadIndexSym->storagePtr); threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount"); ctx->StoreInst(threadCount, threadCountSym->storagePtr); // Copy taskIndex and taskCount into stack-allocated storage so // that their symbols point to something reasonable. taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex"); ctx->StoreInst(taskIndex, taskIndexSym->storagePtr); taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0"); ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr); taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0"); ctx->StoreInst(taskCount0, taskCountSym0->storagePtr); taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); } else { // Regular, non-task function llvm::Function::arg_iterator argIter = function->arg_begin(); for (unsigned int i = 0; i < args.size(); ++i, ++argIter) { Symbol *sym = args[i]; if (sym == NULL) // anonymous function parameter continue; argIter->setName(sym->name.c_str()); // Allocate stack storage for the parameter and emit code // to store the its value there. sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str()); #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */ ctx->StoreInst(argIter, sym->storagePtr); #else /* LLVM 3.8+ */ ctx->StoreInst(&*argIter, sym->storagePtr); #endif ctx->EmitFunctionParameterDebugInfo(sym, i); } // If the number of actual function arguments is equal to the // number of declared arguments in decl->functionParams, then we // don't have a mask parameter, so set it to be all on. This // happens for exmaple with 'export'ed functions that the app // calls. if (argIter == function->arg_end()) { Assert(type->isUnmasked || type->isExported); ctx->SetFunctionMask(LLVMMaskAllOn); } else { Assert(type->isUnmasked == false); // Otherwise use the mask to set the entry mask value argIter->setName("__mask"); Assert(argIter->getType() == LLVMTypes::MaskType); #if ISPC_LLVM_VERSION <= ISPC_LLVM_3_7 /* 3.2, 3.3, 3.4, 3.5, 3.6, 3.7 */ ctx->SetFunctionMask(argIter); #else /* LLVM 3.8+ */ ctx->SetFunctionMask(&*argIter); #endif Assert(++argIter == function->arg_end()); } #ifdef ISPC_NVPTX_ENABLED if (type->isTask == true && g->target->getISA() == Target::NVPTX) { llvm::NamedMDNode* annotations = m->module->getOrInsertNamedMetadata("nvvm.annotations"); #if ISPC_LLVM_VERSION >= ISPC_LLVM_3_6 // LLVM 3.6+ llvm::SmallVector<llvm::Metadata*, 3> av; av.push_back(llvm::ValueAsMetadata::get(function)); av.push_back(llvm::MDString::get(*g->ctx, "kernel")); av.push_back(llvm::ConstantAsMetadata::get(LLVMInt32(1))); annotations->addOperand(llvm::MDNode::get(*g->ctx, llvm::ArrayRef<llvm::Metadata*>(av))); #else llvm::SmallVector<llvm::Value*, 3> av; av.push_back(function); av.push_back(llvm::MDString::get(*g->ctx, "kernel")); av.push_back(LLVMInt32(1)); annotations->addOperand(llvm::MDNode::get(*g->ctx, av)); #endif } #endif /* ISPC_NVPTX_ENABLED */ } // Finally, we can generate code for the function if (code != NULL) { ctx->SetDebugPos(code->pos); ctx->AddInstrumentationPoint("function entry"); int costEstimate = EstimateCost(code); Debug(code->pos, "Estimated cost for function \"%s\" = %d\n", sym->name.c_str(), costEstimate); // If the body of the function is non-trivial, then we wrap the // entire thing inside code that tests to see if the mask is all // on, all off, or mixed. If this is a simple function, then this // isn't worth the code bloat / overhead. bool checkMask = (type->isTask == true) || ( #if ISPC_LLVM_VERSION == ISPC_LLVM_3_2 // 3.2 (function->getFnAttributes().hasAttribute(llvm::Attributes::AlwaysInline) == false) #else // LLVM 3.3+ (function->getAttributes().getFnAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::AlwaysInline) == false) #endif && costEstimate > CHECK_MASK_AT_FUNCTION_START_COST); checkMask &= (type->isUnmasked == false); checkMask &= (g->target->getMaskingIsFree() == false); checkMask &= (g->opt.disableCoherentControlFlow == false); if (checkMask) { llvm::Value *mask = ctx->GetFunctionMask(); llvm::Value *allOn = ctx->All(mask); llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on"); llvm::BasicBlock *bbSomeOn = ctx->CreateBasicBlock("some_on"); // Set up basic blocks for goto targets ctx->InitializeLabelMap(code); ctx->BranchInst(bbAllOn, bbSomeOn, allOn); // all on: we've determined dynamically that the mask is all // on. Set the current mask to "all on" explicitly so that // codegen for this path can be improved with this knowledge in // hand... ctx->SetCurrentBasicBlock(bbAllOn); if (!g->opt.disableMaskAllOnOptimizations) ctx->SetFunctionMask(LLVMMaskAllOn); code->EmitCode(ctx); if (ctx->GetCurrentBasicBlock()) ctx->ReturnInst(); // not all on: however, at least one lane must be running, // since we should never run with all off... some on: reset // the mask to the value it had at function entry and emit the // code. Resetting the mask here is important, due to the "all // on" setting of it for the path above. ctx->SetCurrentBasicBlock(bbSomeOn); ctx->SetFunctionMask(mask); // Set up basic blocks for goto targets again; we want to have // one set of them for gotos in the 'all on' case, and a // distinct set for the 'mixed mask' case. ctx->InitializeLabelMap(code); code->EmitCode(ctx); if (ctx->GetCurrentBasicBlock()) ctx->ReturnInst(); } else { // Set up basic blocks for goto targets ctx->InitializeLabelMap(code); // No check, just emit the code code->EmitCode(ctx); } } if (ctx->GetCurrentBasicBlock()) { // FIXME: We'd like to issue a warning if we've reached the end of // the function without a return statement (for non-void // functions). But the test below isn't right, since we can have // (with 'x' a varying test) "if (x) return a; else return b;", in // which case we have a valid basic block but its unreachable so ok // to not have return statement. #if 0 // If the bblock has no predecessors, then it doesn't matter if it // doesn't have a return; it'll never be reached. If it does, // issue a warning. Also need to warn if it's the entry block for // the function (in which case it will not have predeccesors but is // still reachable.) if (type->GetReturnType()->IsVoidType() == false && (pred_begin(ec.bblock) != pred_end(ec.bblock) || (ec.bblock == entryBBlock))) Warning(sym->pos, "Missing return statement in function returning \"%s\".", type->rType->GetString().c_str()); #endif // FIXME: would like to set the context's current position to // e.g. the end of the function code // if bblock is non-NULL, it hasn't been terminated by e.g. a // return instruction. Need to add a return instruction. ctx->ReturnInst(); } }
/** Given the statements implementing a function, emit the code that implements the function. Most of the work do be done here just involves wiring up the function parameter values to be available in the function body code. */ void Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, SourcePos firstStmtPos) { // Connect the __mask builtin to the location in memory that stores its // value maskSymbol->storagePtr = ctx->GetFullMaskPointer(); // add debugging info for __mask, programIndex, ... maskSymbol->pos = firstStmtPos; ctx->EmitVariableDebugInfo(maskSymbol); #if 0 llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock(); #endif const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type); Assert(type != NULL); if (type->isTask == true) { // For tasks, we there should always be three parmeters: the // pointer to the structure that holds all of the arguments, the // thread index, and the thread count variables. llvm::Function::arg_iterator argIter = function->arg_begin(); llvm::Value *structParamPtr = argIter++; llvm::Value *threadIndex = argIter++; llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; // Copy the function parameter values from the structure into local // storage for (unsigned int i = 0; i < args.size(); ++i) lCopyInTaskParameter(i, structParamPtr, args, ctx); // Copy in the mask as well. int nArgs = (int)args.size(); // The mask is the last parameter in the argument structure llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL, "task_struct_mask"); llvm::Value *ptrval = ctx->LoadInst(ptr, "mask"); ctx->SetFunctionMask(ptrval); // Copy threadIndex and threadCount into stack-allocated storage so // that their symbols point to something reasonable. threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex"); ctx->StoreInst(threadIndex, threadIndexSym->storagePtr); threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount"); ctx->StoreInst(threadCount, threadCountSym->storagePtr); // Copy taskIndex and taskCount into stack-allocated storage so // that their symbols point to something reasonable. taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex"); ctx->StoreInst(taskIndex, taskIndexSym->storagePtr); taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); } else { // Regular, non-task function llvm::Function::arg_iterator argIter = function->arg_begin(); for (unsigned int i = 0; i < args.size(); ++i, ++argIter) { Symbol *sym = args[i]; if (sym == NULL) // anonymous function parameter continue; argIter->setName(sym->name.c_str()); // Allocate stack storage for the parameter and emit code // to store the its value there. sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str()); ctx->StoreInst(argIter, sym->storagePtr); ctx->EmitFunctionParameterDebugInfo(sym); } // If the number of actual function arguments is equal to the // number of declared arguments in decl->functionParams, then we // don't have a mask parameter, so set it to be all on. This // happens for exmaple with 'export'ed functions that the app // calls. if (argIter == function->arg_end()) ctx->SetFunctionMask(LLVMMaskAllOn); else { // Otherwise use the mask to set the entry mask value argIter->setName("__mask"); Assert(argIter->getType() == LLVMTypes::MaskType); ctx->SetFunctionMask(argIter); Assert(++argIter == function->arg_end()); } } // Finally, we can generate code for the function if (code != NULL) { ctx->SetDebugPos(code->pos); ctx->AddInstrumentationPoint("function entry"); int costEstimate = EstimateCost(code); Debug(code->pos, "Estimated cost for function \"%s\" = %d\n", sym->name.c_str(), costEstimate); // If the body of the function is non-trivial, then we wrap the // entire thing inside code that tests to see if the mask is all // on, all off, or mixed. If this is a simple function, then this // isn't worth the code bloat / overhead. bool checkMask = (type->isTask == true) || ((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) && costEstimate > CHECK_MASK_AT_FUNCTION_START_COST); checkMask &= (g->target.maskingIsFree == false); checkMask &= (g->opt.disableCoherentControlFlow == false); if (checkMask) { llvm::Value *mask = ctx->GetFunctionMask(); llvm::Value *allOn = ctx->All(mask); llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on"); llvm::BasicBlock *bbNotAll = ctx->CreateBasicBlock("not_all_on"); // Set up basic blocks for goto targets ctx->InitializeLabelMap(code); ctx->BranchInst(bbAllOn, bbNotAll, allOn); // all on: we've determined dynamically that the mask is all // on. Set the current mask to "all on" explicitly so that // codegen for this path can be improved with this knowledge in // hand... ctx->SetCurrentBasicBlock(bbAllOn); if (!g->opt.disableMaskAllOnOptimizations) ctx->SetFunctionMask(LLVMMaskAllOn); code->EmitCode(ctx); if (ctx->GetCurrentBasicBlock()) ctx->ReturnInst(); // not all on: figure out if no instances are running, or if // some of them are ctx->SetCurrentBasicBlock(bbNotAll); ctx->SetFunctionMask(mask); llvm::BasicBlock *bbNoneOn = ctx->CreateBasicBlock("none_on"); llvm::BasicBlock *bbSomeOn = ctx->CreateBasicBlock("some_on"); llvm::Value *anyOn = ctx->Any(mask); ctx->BranchInst(bbSomeOn, bbNoneOn, anyOn); // Everyone is off; get out of here. ctx->SetCurrentBasicBlock(bbNoneOn); ctx->ReturnInst(); // some on: reset the mask to the value it had at function // entry and emit the code. Resetting the mask here is // important, due to the "all on" setting of it for the path // above ctx->SetCurrentBasicBlock(bbSomeOn); ctx->SetFunctionMask(mask); // Set up basic blocks for goto targets again; we want to have // one set of them for gotos in the 'all on' case, and a // distinct set for the 'mixed mask' case. ctx->InitializeLabelMap(code); code->EmitCode(ctx); if (ctx->GetCurrentBasicBlock()) ctx->ReturnInst(); } else { // Set up basic blocks for goto targets ctx->InitializeLabelMap(code); // No check, just emit the code code->EmitCode(ctx); } } if (ctx->GetCurrentBasicBlock()) { // FIXME: We'd like to issue a warning if we've reached the end of // the function without a return statement (for non-void // functions). But the test below isn't right, since we can have // (with 'x' a varying test) "if (x) return a; else return b;", in // which case we have a valid basic block but its unreachable so ok // to not have return statement. #if 0 // If the bblock has no predecessors, then it doesn't matter if it // doesn't have a return; it'll never be reached. If it does, // issue a warning. Also need to warn if it's the entry block for // the function (in which case it will not have predeccesors but is // still reachable.) if (Type::Equal(type->GetReturnType(), AtomicType::Void) == false && (pred_begin(ec.bblock) != pred_end(ec.bblock) || (ec.bblock == entryBBlock))) Warning(sym->pos, "Missing return statement in function returning \"%s\".", type->rType->GetString().c_str()); #endif // FIXME: would like to set the context's current position to // e.g. the end of the function code // if bblock is non-NULL, it hasn't been terminated by e.g. a // return instruction. Need to add a return instruction. ctx->ReturnInst(); } }