void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) { BasicBlock::iterator LoopBody; SetVector<Value *> Values; SetVector<Value *> IVS; std::vector<int> NumIterations; PTXGenerator::ValueToValueMapTy VMap; assert(!GPUTriple.empty() && "Target triple should be set properly for GPGPU code generation."); PTXGenerator PTXGen(Builder, P, GPUTriple); // Get original IVS and ScopStmt unsigned TiledLoopDepth, NonPLoopDepth; const clast_stmt *InnerStmt = getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth); const clast_stmt *TmpStmt; const clast_user_stmt *U; const clast_for *InnerFor; if (CLAST_STMT_IS_A(InnerStmt, stmt_for)) { InnerFor = (const clast_for *)InnerStmt; TmpStmt = InnerFor->body; } else TmpStmt = InnerStmt; U = (const clast_user_stmt *)TmpStmt; ScopStmt *Statement = (ScopStmt *)U->statement->usr; for (unsigned i = 0; i < Statement->getNumIterators() - NonPLoopDepth; i++) { const Value *IV = Statement->getInductionVariableForDimension(i); IVS.insert(const_cast<Value *>(IV)); } unsigned OutBytes; Values = getGPUValues(OutBytes); PTXGen.setOutputBytes(OutBytes); PTXGen.startGeneration(Values, IVS, VMap, &LoopBody); BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); Builder.SetInsertPoint(LoopBody); BasicBlock *AfterBB = 0; if (NonPLoopDepth) { Value *LowerBound, *UpperBound, *IV, *Stride; Type *IntPtrTy = getIntPtrTy(); LowerBound = ExpGen.codegen(InnerFor->LB, IntPtrTy); UpperBound = ExpGen.codegen(InnerFor->UB, IntPtrTy); Stride = Builder.getInt(APInt_from_MPZ(InnerFor->stride)); IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB, CmpInst::ICMP_SLE); const Value *OldIV_ = Statement->getInductionVariableForDimension(2); Value *OldIV = const_cast<Value *>(OldIV_); VMap.insert(std::make_pair<Value *, Value *>(OldIV, IV)); } updateWithValueMap(VMap); BlockGenerator::generate(Builder, *Statement, ValueMap, P); if (AfterBB) Builder.SetInsertPoint(AfterBB->begin()); // FIXME: The replacement of the host base address with the parameter of ptx // subfunction should have been done by updateWithValueMap. We use the // following codes to avoid affecting other parts of Polly. This should be // fixed later. Function *FN = Builder.GetInsertBlock()->getParent(); for (unsigned j = 0; j < Values.size(); j++) { Value *baseAddr = Values[j]; for (Function::iterator B = FN->begin(); B != FN->end(); ++B) { for (BasicBlock::iterator I = B->begin(); I != B->end(); ++I) I->replaceUsesOfWith(baseAddr, ValueMap[baseAddr]); } } Builder.SetInsertPoint(AfterLoop); PTXGen.setLaunchingParameters(NumIterations[0], NumIterations[1], NumIterations[2], NumIterations[3]); PTXGen.finishGeneration(FN); }
Value* ModuloSchedulerDriverPass::copyLoopBodyToHeader(Instruction* inst, Instruction* induction, BasicBlock* header, int offset){ // Holds the body of the interesting loop BasicBlock *body = inst->getParent(); assert(header && "Header is null"); assert(header->getTerminator() && "Header has no terminator"); // Maps the old instructions to the new Instructions DenseMap<const Value *, Value *> ValueMap; // Do the actual clone stringstream iname; iname<<"___"<<offset<<"___"; BasicBlock* newBB = CloneBasicBlock(body, ValueMap, iname.str().c_str()); // Fixing the dependencies for each of the instructions in the cloned BB // They now depend on themselves rather on the old cloned BB. for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) { for (Instruction::op_iterator ops = (it)->op_begin(); ops != (it)->op_end(); ++ops) { if (ValueMap.end() != ValueMap.find(*ops)) { //*ops = ValueMap[*ops]; it->replaceUsesOfWith(*ops, ValueMap[*ops]); } } } // Fixing the PHI nodes since they are no longer needed for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) { if (PHINode *phi = dyn_cast<PHINode>(it)) { // Taking the preheader entryfrom the PHI node Value* prevalue = phi->getIncomingValue(phi->getBasicBlockIndex(header)); assert(prevalue && "no prevalue. Don't know what to do"); // If we are handling a PHI node which is the induction index ? A[PHI(i,0)] ? // If so, turn it into A[i + offset] if (ValueMap[induction] == phi) { Instruction *add = subscripts::incrementValue(prevalue, offset); //add->insertBefore(phi); This is the same as next line (compiles on LLVM2.1) phi->getParent()->getInstList().insert(phi, add); phi->replaceAllUsesWith(add); } else { // eliminating the PHI node all together // This is just a regular variable or constant. No need to increment // the index. phi->replaceAllUsesWith(prevalue); } } } // Move all non PHI and non terminator instructions into the header. while (!newBB->getFirstNonPHI()->isTerminator()) { Instruction* inst = newBB->getFirstNonPHI(); if (dyn_cast<StoreInst>(inst)) { inst->eraseFromParent(); } else { inst->moveBefore(header->getTerminator()); } } newBB->dropAllReferences(); return ValueMap[inst]; }