Esempio n. 1
0
void PandaInstrumentVisitor::visitMemCpyInst(MemCpyInst &I){
    Function *F = mod->getFunction("log_dynval");
    if (!F) {
        printf("Instrumentation function not found\n");
        assert(1==0);
    }
    PtrToIntInst *PTII_SRC;
    PtrToIntInst *PTII_DST;
    CallInst *CI_SRC;
    CallInst *CI_DST;
    std::vector<Value*> argValues_src;
    std::vector<Value*> argValues_dst;

    PTII_DST = static_cast<PtrToIntInst*>(IRB.CreatePtrToInt(
        I.getOperand(0), wordType));
    argValues_dst.push_back(ConstantInt::get(ptrType,
        (uintptr_t)dynval_buffer));
    argValues_dst.push_back(ConstantInt::get(intType, ADDRENTRY));
    argValues_dst.push_back(ConstantInt::get(intType, STORE));
    argValues_dst.push_back(static_cast<Value*>(PTII_DST));

    PTII_SRC = static_cast<PtrToIntInst*>(IRB.CreatePtrToInt(
        I.getOperand(1), wordType));
    argValues_src.push_back(ConstantInt::get(ptrType,
        (uintptr_t)dynval_buffer));
    argValues_src.push_back(ConstantInt::get(intType, ADDRENTRY));
    argValues_src.push_back(ConstantInt::get(intType, LOAD));
    argValues_src.push_back(static_cast<Value*>(PTII_SRC));

    //The load must come first in the dynamic log
    CI_SRC = IRB.CreateCall(F, ArrayRef<Value*>(argValues_src));
    CI_SRC->insertBefore(static_cast<Instruction*>(&I));
    PTII_SRC->insertBefore(static_cast<Instruction*>(CI_SRC));

    //The store must come second in the dynamic log
    CI_DST = IRB.CreateCall(F, ArrayRef<Value*>(argValues_dst));
    CI_DST->insertBefore(static_cast<Instruction*>(&I));
    PTII_DST->insertBefore(static_cast<Instruction*>(CI_DST));
}
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
    if (TD == 0) return false;

    // Find out what feeds this byval argument.
    Value *ByValArg = CS.getArgument(ArgNo);
    Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
    uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
    MemDepResult DepInfo =
        MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
                                     true, CS.getInstruction(),
                                     CS.getInstruction()->getParent());
    if (!DepInfo.isClobber())
        return false;

    // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
    // a memcpy, see if we can byval from the source of the memcpy instead of the
    // result.
    MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
    if (MDep == 0 || MDep->isVolatile() ||
            ByValArg->stripPointerCasts() != MDep->getDest())
        return false;

    // The length of the memcpy must be larger or equal to the size of the byval.
    ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
    if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
        return false;

    // Get the alignment of the byval.  If the call doesn't specify the alignment,
    // then it is some target specific value that we can't know.
    unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
    if (ByValAlign == 0) return false;

    // If it is greater than the memcpy, then we check to see if we can force the
    // source of the memcpy to the alignment we need.  If we fail, we bail out.
    if (MDep->getAlignment() < ByValAlign &&
            getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
        return false;

    // Verify that the copied-from memory doesn't change in between the memcpy and
    // the byval call.
    //    memcpy(a <- b)
    //    *b = 42;
    //    foo(*a)
    // It would be invalid to transform the second memcpy into foo(*b).
    //
    // NOTE: This is conservative, it will stop on any read from the source loc,
    // not just the defining memcpy.
    MemDepResult SourceDep =
        MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
                                     false, CS.getInstruction(), MDep->getParent());
    if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
        return false;

    Value *TmpCast = MDep->getSource();
    if (MDep->getSource()->getType() != ByValArg->getType())
        TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
                                  "tmpcast", CS.getInstruction());

    DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
          << "  " << *MDep << "\n"
          << "  " << *CS.getInstruction() << "\n");

    // Otherwise we're good!  Update the byval argument.
    CS.setArgument(ArgNo, TmpCast);
    ++NumMemCpyInstr;
    return true;
}
Esempio n. 3
0
void Lint::visitCallSite(CallSite CS) {
  Instruction &I = *CS.getInstruction();
  Value *Callee = CS.getCalledValue();

  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
                       MemRef::Callee);

  if (Function *F = dyn_cast<Function>(findValue(Callee,
                                                 /*OffsetOk=*/false))) {
    Assert(CS.getCallingConv() == F->getCallingConv(),
           "Undefined behavior: Caller and callee calling convention differ",
           &I);

    FunctionType *FT = F->getFunctionType();
    unsigned NumActualArgs = CS.arg_size();

    Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
                          : FT->getNumParams() == NumActualArgs,
           "Undefined behavior: Call argument count mismatches callee "
           "argument count",
           &I);

    Assert(FT->getReturnType() == I.getType(),
           "Undefined behavior: Call return type mismatches "
           "callee return type",
           &I);

    // Check argument types (in case the callee was casted) and attributes.
    // TODO: Verify that caller and callee attributes are compatible.
    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
    for (; AI != AE; ++AI) {
      Value *Actual = *AI;
      if (PI != PE) {
        Argument *Formal = &*PI++;
        Assert(Formal->getType() == Actual->getType(),
               "Undefined behavior: Call argument type mismatches "
               "callee parameter type",
               &I);

        // Check that noalias arguments don't alias other arguments. This is
        // not fully precise because we don't know the sizes of the dereferenced
        // memory regions.
        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
            if (AI != BI && (*BI)->getType()->isPointerTy()) {
              AliasResult Result = AA->alias(*AI, *BI);
              Assert(Result != MustAlias && Result != PartialAlias,
                     "Unusual: noalias argument aliases another argument", &I);
            }

        // Check that an sret argument points to valid memory.
        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
          Type *Ty =
            cast<PointerType>(Formal->getType())->getElementType();
          visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
                               DL->getABITypeAlignment(Ty), Ty,
                               MemRef::Read | MemRef::Write);
        }
      }
    }
  }

  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
         AI != AE; ++AI) {
      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
      Assert(!isa<AllocaInst>(Obj),
             "Undefined behavior: Call with \"tail\" keyword references "
             "alloca",
             &I);
    }


  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
    switch (II->getIntrinsicID()) {
    default: break;

    // TODO: Check more intrinsics

    case Intrinsic::memcpy: {
      MemCpyInst *MCI = cast<MemCpyInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
                           MCI->getAlignment(), nullptr, MemRef::Write);
      visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
                           MCI->getAlignment(), nullptr, MemRef::Read);

      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
      // isn't expressive enough for what we really want to do. Known partial
      // overlap is not distinguished from the case where nothing is known.
      uint64_t Size = 0;
      if (const ConstantInt *Len =
              dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                              /*OffsetOk=*/false)))
        if (Len->getValue().isIntN(32))
          Size = Len->getValue().getZExtValue();
      Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
                 MustAlias,
             "Undefined behavior: memcpy source and destination overlap", &I);
      break;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MMI = cast<MemMoveInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
                           MMI->getAlignment(), nullptr, MemRef::Write);
      visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
                           MMI->getAlignment(), nullptr, MemRef::Read);
      break;
    }
    case Intrinsic::memset: {
      MemSetInst *MSI = cast<MemSetInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
                           MSI->getAlignment(), nullptr, MemRef::Write);
      break;
    }

    case Intrinsic::vastart:
      Assert(I.getParent()->getParent()->isVarArg(),
             "Undefined behavior: va_start called in a non-varargs function",
             &I);

      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;
    case Intrinsic::vacopy:
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Write);
      visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read);
      break;
    case Intrinsic::vaend:
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;

    case Intrinsic::stackrestore:
      // Stackrestore doesn't read or write memory, but it sets the
      // stack pointer, which the compiler may read from or write to
      // at any time, so check it for both readability and writeability.
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;
    }
}
Esempio n. 4
0
/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
///  This allows later passes to remove the first memcpy altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();

  // The are two possible optimizations we can do for memcpy:
  //   a) memcpy-memcpy xform which exposes redundance for DSE.
  //   b) call-memcpy xform for return slot optimization.
  MemDepResult dep = MD.getDependency(M);
  if (!dep.isClobber())
    return false;
  if (!isa<MemCpyInst>(dep.getInst())) {
    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
      return performCallSlotOptzn(M, C);
    return false;
  }
  
  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
  
  // We can only transforms memcpy's where the dest of one is the source of the
  // other
  if (M->getSource() != MDep->getDest())
    return false;
  
  // Second, the length of the memcpy's must be the same, or the preceeding one
  // must be larger than the following one.
  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
  if (!C1 || !C2)
    return false;
  
  uint64_t DepSize = C1->getValue().getZExtValue();
  uint64_t CpySize = C2->getValue().getZExtValue();
  
  if (DepSize < CpySize)
    return false;
  
  // Finally, we have to make sure that the dest of the second does not
  // alias the source of the first
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
      AliasAnalysis::NoAlias)
    return false;
  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
           AliasAnalysis::NoAlias)
    return false;
  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
           != AliasAnalysis::NoAlias)
    return false;
  
  // If all checks passed, then we can transform these memcpy's
  const Type *ArgTys[3] = { M->getRawDest()->getType(),
                            MDep->getRawSource()->getType(),
                            M->getLength()->getType() };
  Function *MemCpyFun = Intrinsic::getDeclaration(
                                 M->getParent()->getParent()->getParent(),
                                 M->getIntrinsicID(), ArgTys, 3);
    
  Value *Args[5] = {
    M->getRawDest(), MDep->getRawSource(), M->getLength(),
    M->getAlignmentCst(), M->getVolatileCst()
  };
  
  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
  
  
  // If C and M don't interfere, then this is a valid transformation.  If they
  // did, this would mean that the two sources overlap, which would be bad.
  if (MD.getDependency(C) == dep) {
    MD.removeInstruction(M);
    M->eraseFromParent();
    ++NumMemCpyInstr;
    return true;
  }
  
  // Otherwise, there was no point in doing this, so we remove the call we
  // inserted and act like nothing happened.
  MD.removeInstruction(C);
  C->eraseFromParent();
  return false;
}
Esempio n. 5
0
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
  // Array allocations are probably not worth handling, since an allocation of
  // the array type is the canonical form.
  if (!I.isStaticAlloca() || I.isArrayAllocation())
    return;

  IRBuilder<> Builder(&I);

  // First try to replace the alloca with a vector
  Type *AllocaTy = I.getAllocatedType();

  DEBUG(dbgs() << "Trying to promote " << I << '\n');

  if (tryPromoteAllocaToVector(&I))
    return;

  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");

  const Function &ContainingFunction = *I.getParent()->getParent();

  // FIXME: We should also try to get this value from the reqd_work_group_size
  // function attribute if it is available.
  unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);

  int AllocaSize =
      WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);

  if (AllocaSize > LocalMemAvailable) {
    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
    return;
  }

  std::vector<Value*> WorkList;

  if (!collectUsesWithPtrTypes(&I, WorkList)) {
    DEBUG(dbgs() << " Do not know how to convert all uses\n");
    return;
  }

  DEBUG(dbgs() << "Promoting alloca to local memory\n");
  LocalMemAvailable -= AllocaSize;

  Function *F = I.getParent()->getParent();

  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
  GlobalVariable *GV = new GlobalVariable(
      *Mod, GVTy, false, GlobalValue::InternalLinkage,
      UndefValue::get(GVTy),
      Twine(F->getName()) + Twine('.') + I.getName(),
      nullptr,
      GlobalVariable::NotThreadLocal,
      AMDGPUAS::LOCAL_ADDRESS);
  GV->setUnnamedAddr(true);
  GV->setAlignment(I.getAlignment());

  Value *TCntY, *TCntZ;

  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
  Value *TIdX = getWorkitemID(Builder, 0);
  Value *TIdY = getWorkitemID(Builder, 1);
  Value *TIdZ = getWorkitemID(Builder, 2);

  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
  TID = Builder.CreateAdd(TID, TIdZ);

  Value *Indices[] = {
    Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
    TID
  };

  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
  I.mutateType(Offset->getType());
  I.replaceAllUsesWith(Offset);
  I.eraseFromParent();

  for (Value *V : WorkList) {
    CallInst *Call = dyn_cast<CallInst>(V);
    if (!Call) {
      Type *EltTy = V->getType()->getPointerElementType();
      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);

      // The operand's value should be corrected on its own.
      if (isa<AddrSpaceCastInst>(V))
        continue;

      // FIXME: It doesn't really make sense to try to do this for all
      // instructions.
      V->mutateType(NewTy);
      continue;
    }

    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
    if (!Intr) {
      // FIXME: What is this for? It doesn't make sense to promote arbitrary
      // function calls. If the call is to a defined function that can also be
      // promoted, we should be able to do this once that function is also
      // rewritten.

      std::vector<Type*> ArgTypes;
      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
                                ArgIdx != ArgEnd; ++ArgIdx) {
        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
      }
      Function *F = Call->getCalledFunction();
      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                F->isVarArg());
      Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
                                             NewType, F->getAttributes());
      Function *NewF = cast<Function>(C);
      Call->setCalledFunction(NewF);
      continue;
    }

    Builder.SetInsertPoint(Intr);
    switch (Intr->getIntrinsicID()) {
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
      // These intrinsics are for address space 0 only
      Intr->eraseFromParent();
      continue;
    case Intrinsic::memcpy: {
      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
                           MemCpy->getLength(), MemCpy->getAlignment(),
                           MemCpy->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
      Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(),
                            MemMove->getLength(), MemMove->getAlignment(),
                            MemMove->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memset: {
      MemSetInst *MemSet = cast<MemSetInst>(Intr);
      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
                           MemSet->getLength(), MemSet->getAlignment(),
                           MemSet->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::invariant_start:
    case Intrinsic::invariant_end:
    case Intrinsic::invariant_group_barrier:
      Intr->eraseFromParent();
      // FIXME: I think the invariant marker should still theoretically apply,
      // but the intrinsics need to be changed to accept pointers with any
      // address space.
      continue;
    case Intrinsic::objectsize: {
      Value *Src = Intr->getOperand(0);
      Type *SrcTy = Src->getType()->getPointerElementType();
      Function *ObjectSize = Intrinsic::getDeclaration(Mod,
        Intrinsic::objectsize,
        { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
      );

      CallInst *NewCall
        = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) });
      Intr->replaceAllUsesWith(NewCall);
      Intr->eraseFromParent();
      continue;
    }
    default:
      Intr->dump();
      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
    }
  }
}
Esempio n. 6
0
// FIXME: Should try to pick the most likely to be profitable allocas first.
bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
  // Array allocations are probably not worth handling, since an allocation of
  // the array type is the canonical form.
  if (!I.isStaticAlloca() || I.isArrayAllocation())
    return false;

  IRBuilder<> Builder(&I);

  // First try to replace the alloca with a vector
  Type *AllocaTy = I.getAllocatedType();

  DEBUG(dbgs() << "Trying to promote " << I << '\n');

  if (tryPromoteAllocaToVector(&I, AS))
    return true; // Promoted to vector.

  const Function &ContainingFunction = *I.getParent()->getParent();
  CallingConv::ID CC = ContainingFunction.getCallingConv();

  // Don't promote the alloca to LDS for shader calling conventions as the work
  // item ID intrinsics are not supported for these calling conventions.
  // Furthermore not all LDS is available for some of the stages.
  switch (CC) {
  case CallingConv::AMDGPU_KERNEL:
  case CallingConv::SPIR_KERNEL:
    break;
  default:
    DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
    return false;
  }

  // Not likely to have sufficient local memory for promotion.
  if (!SufficientLDS)
    return false;

  const AMDGPUSubtarget &ST =
    TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;

  const DataLayout &DL = Mod->getDataLayout();

  unsigned Align = I.getAlignment();
  if (Align == 0)
    Align = DL.getABITypeAlignment(I.getAllocatedType());

  // FIXME: This computed padding is likely wrong since it depends on inverse
  // usage order.
  //
  // FIXME: It is also possible that if we're allowed to use all of the memory
  // could could end up using more than the maximum due to alignment padding.

  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
  NewSize += AllocSize;

  if (NewSize > LocalMemLimit) {
    DEBUG(dbgs() << "  " << AllocSize
          << " bytes of local memory not available to promote\n");
    return false;
  }

  CurrentLocalMemUsage = NewSize;

  std::vector<Value*> WorkList;

  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
    DEBUG(dbgs() << " Do not know how to convert all uses\n");
    return false;
  }

  DEBUG(dbgs() << "Promoting alloca to local memory\n");

  Function *F = I.getParent()->getParent();

  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
  GlobalVariable *GV = new GlobalVariable(
      *Mod, GVTy, false, GlobalValue::InternalLinkage,
      UndefValue::get(GVTy),
      Twine(F->getName()) + Twine('.') + I.getName(),
      nullptr,
      GlobalVariable::NotThreadLocal,
      AS.LOCAL_ADDRESS);
  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  GV->setAlignment(I.getAlignment());

  Value *TCntY, *TCntZ;

  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
  Value *TIdX = getWorkitemID(Builder, 0);
  Value *TIdY = getWorkitemID(Builder, 1);
  Value *TIdZ = getWorkitemID(Builder, 2);

  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
  TID = Builder.CreateAdd(TID, TIdZ);

  Value *Indices[] = {
    Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
    TID
  };

  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
  I.mutateType(Offset->getType());
  I.replaceAllUsesWith(Offset);
  I.eraseFromParent();

  for (Value *V : WorkList) {
    CallInst *Call = dyn_cast<CallInst>(V);
    if (!Call) {
      if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
        Value *Src0 = CI->getOperand(0);
        Type *EltTy = Src0->getType()->getPointerElementType();
        PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);

        if (isa<ConstantPointerNull>(CI->getOperand(0)))
          CI->setOperand(0, ConstantPointerNull::get(NewTy));

        if (isa<ConstantPointerNull>(CI->getOperand(1)))
          CI->setOperand(1, ConstantPointerNull::get(NewTy));

        continue;
      }

      // The operand's value should be corrected on its own and we don't want to
      // touch the users.
      if (isa<AddrSpaceCastInst>(V))
        continue;

      Type *EltTy = V->getType()->getPointerElementType();
      PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);

      // FIXME: It doesn't really make sense to try to do this for all
      // instructions.
      V->mutateType(NewTy);

      // Adjust the types of any constant operands.
      if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
        if (isa<ConstantPointerNull>(SI->getOperand(1)))
          SI->setOperand(1, ConstantPointerNull::get(NewTy));

        if (isa<ConstantPointerNull>(SI->getOperand(2)))
          SI->setOperand(2, ConstantPointerNull::get(NewTy));
      } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
        for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
          if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
            Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
        }
      }

      continue;
    }

    IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
    Builder.SetInsertPoint(Intr);
    switch (Intr->getIntrinsicID()) {
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
      // These intrinsics are for address space 0 only
      Intr->eraseFromParent();
      continue;
    case Intrinsic::memcpy: {
      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
                           MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
                           MemCpy->getLength(), MemCpy->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
      Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
                            MemMove->getRawSource(), MemMove->getSourceAlignment(),
                            MemMove->getLength(), MemMove->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memset: {
      MemSetInst *MemSet = cast<MemSetInst>(Intr);
      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
                           MemSet->getLength(), MemSet->getDestAlignment(),
                           MemSet->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::invariant_start:
    case Intrinsic::invariant_end:
    case Intrinsic::invariant_group_barrier:
      Intr->eraseFromParent();
      // FIXME: I think the invariant marker should still theoretically apply,
      // but the intrinsics need to be changed to accept pointers with any
      // address space.
      continue;
    case Intrinsic::objectsize: {
      Value *Src = Intr->getOperand(0);
      Type *SrcTy = Src->getType()->getPointerElementType();
      Function *ObjectSize = Intrinsic::getDeclaration(Mod,
        Intrinsic::objectsize,
        { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
      );

      CallInst *NewCall = Builder.CreateCall(
          ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
      Intr->replaceAllUsesWith(NewCall);
      Intr->eraseFromParent();
      continue;
    }
    default:
      Intr->print(errs());
      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
    }
  }
  return true;
}
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
  IRBuilder<> Builder(&I);

  // First try to replace the alloca with a vector
  Type *AllocaTy = I.getAllocatedType();

  DEBUG(dbgs() << "Trying to promote " << I << '\n');

  if (tryPromoteAllocaToVector(&I))
    return;

  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");

  // FIXME: This is the maximum work group size.  We should try to get
  // value from the reqd_work_group_size function attribute if it is
  // available.
  unsigned WorkGroupSize = 256;
  int AllocaSize = WorkGroupSize *
      Mod->getDataLayout()->getTypeAllocSize(AllocaTy);

  if (AllocaSize > LocalMemAvailable) {
    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
    return;
  }

  std::vector<Value*> WorkList;

  if (!collectUsesWithPtrTypes(&I, WorkList)) {
    DEBUG(dbgs() << " Do not know how to convert all uses\n");
    return;
  }

  DEBUG(dbgs() << "Promoting alloca to local memory\n");
  LocalMemAvailable -= AllocaSize;

  GlobalVariable *GV = new GlobalVariable(
      *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
      GlobalValue::ExternalLinkage, 0, I.getName(), 0,
      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);

  FunctionType *FTy = FunctionType::get(
      Type::getInt32Ty(Mod->getContext()), false);
  AttributeSet AttrSet;
  AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);

  Value *ReadLocalSizeY = Mod->getOrInsertFunction(
      "llvm.r600.read.local.size.y", FTy, AttrSet);
  Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
      "llvm.r600.read.local.size.z", FTy, AttrSet);
  Value *ReadTIDIGX = Mod->getOrInsertFunction(
      "llvm.r600.read.tidig.x", FTy, AttrSet);
  Value *ReadTIDIGY = Mod->getOrInsertFunction(
      "llvm.r600.read.tidig.y", FTy, AttrSet);
  Value *ReadTIDIGZ = Mod->getOrInsertFunction(
      "llvm.r600.read.tidig.z", FTy, AttrSet);


  Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
  Value *TIdX  = Builder.CreateCall(ReadTIDIGX);
  Value *TIdY  = Builder.CreateCall(ReadTIDIGY);
  Value *TIdZ  = Builder.CreateCall(ReadTIDIGZ);

  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
  TID = Builder.CreateAdd(TID, TIdZ);

  std::vector<Value*> Indices;
  Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
  Indices.push_back(TID);

  Value *Offset = Builder.CreateGEP(GV, Indices);
  I.mutateType(Offset->getType());
  I.replaceAllUsesWith(Offset);
  I.eraseFromParent();

  for (std::vector<Value*>::iterator i = WorkList.begin(),
                                     e = WorkList.end(); i != e; ++i) {
    Value *V = *i;
    CallInst *Call = dyn_cast<CallInst>(V);
    if (!Call) {
      Type *EltTy = V->getType()->getPointerElementType();
      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);

      // The operand's value should be corrected on its own.
      if (isa<AddrSpaceCastInst>(V))
        continue;

      // FIXME: It doesn't really make sense to try to do this for all
      // instructions.
      V->mutateType(NewTy);
      continue;
    }

    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
    if (!Intr) {
      std::vector<Type*> ArgTypes;
      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
                                ArgIdx != ArgEnd; ++ArgIdx) {
        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
      }
      Function *F = Call->getCalledFunction();
      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                F->isVarArg());
      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
                                             F->getAttributes());
      Function *NewF = cast<Function>(C);
      Call->setCalledFunction(NewF);
      continue;
    }

    Builder.SetInsertPoint(Intr);
    switch (Intr->getIntrinsicID()) {
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
      // These intrinsics are for address space 0 only
      Intr->eraseFromParent();
      continue;
    case Intrinsic::memcpy: {
      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
                           MemCpy->getLength(), MemCpy->getAlignment(),
                           MemCpy->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memset: {
      MemSetInst *MemSet = cast<MemSetInst>(Intr);
      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
                           MemSet->getLength(), MemSet->getAlignment(),
                           MemSet->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    default:
      Intr->dump();
      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
    }
  }
}
Esempio n. 8
0
void Lint::visitCallSite(CallSite CS) {
  Instruction &I = *CS.getInstruction();
  Value *Callee = CS.getCalledValue();

  // TODO: Check function alignment?
  visitMemoryReference(I, Callee, 0, 0);

  if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
    Assert1(CS.getCallingConv() == F->getCallingConv(),
            "Undefined behavior: Caller and callee calling convention differ",
            &I);

    const FunctionType *FT = F->getFunctionType();
    unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());

    Assert1(FT->isVarArg() ?
              FT->getNumParams() <= NumActualArgs :
              FT->getNumParams() == NumActualArgs,
            "Undefined behavior: Call argument count mismatches callee "
            "argument count", &I);
      
    // TODO: Check argument types (in case the callee was casted)

    // TODO: Check ABI-significant attributes.

    // TODO: Check noalias attribute.

    // TODO: Check sret attribute.
  }

  // TODO: Check the "tail" keyword constraints.

  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
    switch (II->getIntrinsicID()) {
    default: break;

    // TODO: Check more intrinsics

    case Intrinsic::memcpy: {
      MemCpyInst *MCI = cast<MemCpyInst>(&I);
      visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0);
      visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0);

      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
      // isn't expressive enough for what we really want to do. Known partial
      // overlap is not distinguished from the case where nothing is known.
      unsigned Size = 0;
      if (const ConstantInt *Len =
            dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts()))
        if (Len->getValue().isIntN(32))
          Size = Len->getValue().getZExtValue();
      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
              AliasAnalysis::MustAlias,
              "Undefined behavior: memcpy source and destination overlap", &I);
      break;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MMI = cast<MemMoveInst>(&I);
      visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0);
      visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0);
      break;
    }
    case Intrinsic::memset: {
      MemSetInst *MSI = cast<MemSetInst>(&I);
      visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0);
      break;
    }

    case Intrinsic::vastart:
      Assert1(I.getParent()->getParent()->isVarArg(),
              "Undefined behavior: va_start called in a non-varargs function",
              &I);

      visitMemoryReference(I, CS.getArgument(0), 0, 0);
      break;
    case Intrinsic::vacopy:
      visitMemoryReference(I, CS.getArgument(0), 0, 0);
      visitMemoryReference(I, CS.getArgument(1), 0, 0);
      break;
    case Intrinsic::vaend:
      visitMemoryReference(I, CS.getArgument(0), 0, 0);
      break;

    case Intrinsic::stackrestore:
      visitMemoryReference(I, CS.getArgument(0), 0, 0);
      break;
    }
}