示例#1
0
void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) {
  BasicBlock::iterator LoopBody;
  SetVector<Value *> Values;
  SetVector<Value *> IVS;
  std::vector<int> NumIterations;
  PTXGenerator::ValueToValueMapTy VMap;

  assert(!GPUTriple.empty() &&
         "Target triple should be set properly for GPGPU code generation.");
  PTXGenerator PTXGen(Builder, P, GPUTriple);

  // Get original IVS and ScopStmt
  unsigned TiledLoopDepth, NonPLoopDepth;
  const clast_stmt *InnerStmt =
      getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth);
  const clast_stmt *TmpStmt;
  const clast_user_stmt *U;
  const clast_for *InnerFor;
  if (CLAST_STMT_IS_A(InnerStmt, stmt_for)) {
    InnerFor = (const clast_for *)InnerStmt;
    TmpStmt = InnerFor->body;
  } else
    TmpStmt = InnerStmt;
  U = (const clast_user_stmt *)TmpStmt;
  ScopStmt *Statement = (ScopStmt *)U->statement->usr;
  for (unsigned i = 0; i < Statement->getNumIterators() - NonPLoopDepth; i++) {
    const Value *IV = Statement->getInductionVariableForDimension(i);
    IVS.insert(const_cast<Value *>(IV));
  }

  unsigned OutBytes;
  Values = getGPUValues(OutBytes);
  PTXGen.setOutputBytes(OutBytes);
  PTXGen.startGeneration(Values, IVS, VMap, &LoopBody);

  BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
  Builder.SetInsertPoint(LoopBody);

  BasicBlock *AfterBB = 0;
  if (NonPLoopDepth) {
    Value *LowerBound, *UpperBound, *IV, *Stride;
    Type *IntPtrTy = getIntPtrTy();
    LowerBound = ExpGen.codegen(InnerFor->LB, IntPtrTy);
    UpperBound = ExpGen.codegen(InnerFor->UB, IntPtrTy);
    Stride = Builder.getInt(APInt_from_MPZ(InnerFor->stride));
    IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB,
                    CmpInst::ICMP_SLE);
    const Value *OldIV_ = Statement->getInductionVariableForDimension(2);
    Value *OldIV = const_cast<Value *>(OldIV_);
    VMap.insert(std::make_pair<Value *, Value *>(OldIV, IV));
  }

  updateWithValueMap(VMap);

  BlockGenerator::generate(Builder, *Statement, ValueMap, P);

  if (AfterBB)
    Builder.SetInsertPoint(AfterBB->begin());

  // FIXME: The replacement of the host base address with the parameter of ptx
  // subfunction should have been done by updateWithValueMap. We use the
  // following codes to avoid affecting other parts of Polly. This should be
  // fixed later.
  Function *FN = Builder.GetInsertBlock()->getParent();
  for (unsigned j = 0; j < Values.size(); j++) {
    Value *baseAddr = Values[j];
    for (Function::iterator B = FN->begin(); B != FN->end(); ++B) {
      for (BasicBlock::iterator I = B->begin(); I != B->end(); ++I)
        I->replaceUsesOfWith(baseAddr, ValueMap[baseAddr]);
    }
  }
  Builder.SetInsertPoint(AfterLoop);
  PTXGen.setLaunchingParameters(NumIterations[0], NumIterations[1],
                                NumIterations[2], NumIterations[3]);
  PTXGen.finishGeneration(FN);
}
    Value* ModuloSchedulerDriverPass::copyLoopBodyToHeader(Instruction* inst,
            Instruction* induction, BasicBlock* header, int offset){

        // Holds the body of the interesting loop
        BasicBlock *body = inst->getParent();

        assert(header && "Header is null");
        assert(header->getTerminator() && "Header has no terminator");

        // Maps the old instructions to the new Instructions
        DenseMap<const Value *, Value *>  ValueMap;
        // Do the actual clone
        stringstream iname;
        iname<<"___"<<offset<<"___";
        BasicBlock* newBB = CloneBasicBlock(body, ValueMap, iname.str().c_str());

        // Fixing the dependencies for each of the instructions in the cloned BB
        // They now depend on themselves rather on the old cloned BB.
        for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) {
            for (Instruction::op_iterator ops = (it)->op_begin(); ops != (it)->op_end(); ++ops) {
                if (ValueMap.end() != ValueMap.find(*ops)) {
                    //*ops = ValueMap[*ops];
                    it->replaceUsesOfWith(*ops, ValueMap[*ops]);
                }
            }
        }

        // Fixing the PHI nodes since they are no longer needed
        for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) {
            if (PHINode *phi = dyn_cast<PHINode>(it)) {
                // Taking the preheader entryfrom the PHI node

                Value* prevalue = phi->getIncomingValue(phi->getBasicBlockIndex(header));
                assert(prevalue && "no prevalue. Don't know what to do");

                // If we are handling a PHI node which is the induction index ? A[PHI(i,0)] ?
                // If so, turn it into A[i + offset]
                if (ValueMap[induction] == phi) {
                    Instruction *add = subscripts::incrementValue(prevalue, offset);
                    //add->insertBefore(phi); This is the same as next line (compiles on LLVM2.1)
                    phi->getParent()->getInstList().insert(phi, add);
                    phi->replaceAllUsesWith(add);
                }  else {
                    // eliminating the PHI node all together
                    // This is just a regular variable or constant. No need to increment
                    // the index.
                    phi->replaceAllUsesWith(prevalue);
                }
            } 
        }

        // Move all non PHI and non terminator instructions into the header.
        while (!newBB->getFirstNonPHI()->isTerminator()) {
            Instruction* inst = newBB->getFirstNonPHI();
            if (dyn_cast<StoreInst>(inst)) {
                inst->eraseFromParent();
            } else {
                inst->moveBefore(header->getTerminator());
            }
        }
        newBB->dropAllReferences();
        return ValueMap[inst];
    }