/// If the argument is a GEP, then returns the operand identified by /// getGEPInductionOperand. However, if there is some other non-loop-invariant /// operand, it returns that instead. Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); if (!GEP) return Ptr; unsigned InductionOperand = getGEPInductionOperand(GEP); // Check that all of the gep indices are uniform except for our induction // operand. for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) if (i != InductionOperand && !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) return Ptr; return GEP->getOperand(InductionOperand); }
// FIXME: Merge with llvm::isConsecutiveAccess bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { Value *PtrA = getPointerOperand(A); Value *PtrB = getPointerOperand(B); unsigned ASA = getPointerAddressSpace(A); unsigned ASB = getPointerAddressSpace(B); // Check that the address spaces match and that the pointers are valid. if (!PtrA || !PtrB || (ASA != ASB)) return false; // Make sure that A and B are different pointers of the same size type. unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *PtrATy = PtrA->getType()->getPointerElementType(); Type *PtrBTy = PtrB->getType()->getPointerElementType(); if (PtrA == PtrB || DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) || DL.getTypeStoreSize(PtrATy->getScalarType()) != DL.getTypeStoreSize(PtrBTy->getScalarType())) return false; APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy)); unsigned IdxWidth = DL.getIndexSizeInBits(ASA); APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); APInt OffsetDelta = OffsetB - OffsetA; // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) return OffsetDelta == Size; // Compute the necessary base pointer delta to have the necessary final delta // equal to the size. APInt BaseDelta = Size - OffsetDelta; // Compute the distance with SCEV between the base pointers. const SCEV *PtrSCEVA = SE.getSCEV(PtrA); const SCEV *PtrSCEVB = SE.getSCEV(PtrB); const SCEV *C = SE.getConstant(BaseDelta); const SCEV *X = SE.getAddExpr(PtrSCEVA, C); if (X == PtrSCEVB) return true; // Sometimes even this doesn't work, because SCEV can't always see through // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking // things the hard way. // Look through GEPs after checking they're the same except for the last // index. GetElementPtrInst *GEPA = getSourceGEP(A); GetElementPtrInst *GEPB = getSourceGEP(B); if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) return false; unsigned FinalIndex = GEPA->getNumOperands() - 1; for (unsigned i = 0; i < FinalIndex; i++) if (GEPA->getOperand(i) != GEPB->getOperand(i)) return false; Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex)); Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex)); if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() || OpA->getType() != OpB->getType()) return false; // Only look through a ZExt/SExt. if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA)) return false; bool Signed = isa<SExtInst>(OpA); OpA = dyn_cast<Instruction>(OpA->getOperand(0)); OpB = dyn_cast<Instruction>(OpB->getOperand(0)); if (!OpA || !OpB || OpA->getType() != OpB->getType()) return false; // Now we need to prove that adding 1 to OpA won't overflow. bool Safe = false; // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA, // we're okay. if (OpB->getOpcode() == Instruction::Add && isa<ConstantInt>(OpB->getOperand(1)) && cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) { if (Signed) Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap(); else Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap(); } unsigned BitWidth = OpA->getType()->getScalarSizeInBits(); // Second attempt: // If any bits are known to be zero other than the sign bit in OpA, we can // add 1 to it while guaranteeing no overflow of any sort. if (!Safe) { KnownBits Known(BitWidth); computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT); if (Known.countMaxTrailingOnes() < (BitWidth - 1)) Safe = true; } if (!Safe) return false; const SCEV *OffsetSCEVA = SE.getSCEV(OpA); const SCEV *OffsetSCEVB = SE.getSCEV(OpB); const SCEV *One = SE.getConstant(APInt(BitWidth, 1)); const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One); return X2 == OffsetSCEVB; }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. // Find all GEPs, and simplify them. // // Inputs: // M - A reference to the LLVM module to transform // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool SimplifyGEP::runOnModule(Module& M) { TD = &getAnalysis<TargetData>(); preprocess(M); for (Module::iterator F = M.begin(); F != M.end(); ++F){ for (Function::iterator B = F->begin(), FE = F->end(); B != FE; ++B) { for (BasicBlock::iterator I = B->begin(), BE = B->end(); I != BE; I++) { if(!(isa<GetElementPtrInst>(I))) continue; GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); Value *PtrOp = GEP->getOperand(0); Value *StrippedPtr = PtrOp->stripPointerCasts(); // Check if the GEP base pointer is enclosed in a cast if (StrippedPtr != PtrOp) { const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast<ConstantInt>(GEP->getOperand(1))) HasZeroPointerIndex = C->isZero(); // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); if (const ArrayType *CATy = dyn_cast<ArrayType>(CPTy->getElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP->idx_begin()+1, GEP->idx_end()); GetElementPtrInst *Res = GetElementPtrInst::Create(StrippedPtr, Idx, GEP->getName(), GEP); Res->setIsInBounds(GEP->isInBounds()); GEP->replaceAllUsesWith(Res); continue; } if (const ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... // At this point, we know that the cast source type is a pointer // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. GEP->setOperand(0, StrippedPtr); continue; } } } } else if (GEP->getNumOperands() == 2) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP->getContext())); Idx[1] = GEP->getOperand(1); Value *NewGEP = GetElementPtrInst::Create(StrippedPtr, Idx, GEP->getName(), GEP); // V and GEP are both pointer types --> BitCast GEP->replaceAllUsesWith(new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP)); continue; } // Transform things like: // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We // allow either a mul, shift, or constant here. Value *NewIdx = 0; ConstantInt *Scale = 0; if (ArrayEltSize == 1) { NewIdx = GEP->getOperand(1); Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(1))) { NewIdx = ConstantInt::get(CI->getType(), 1); Scale = CI; } else if (Instruction *Inst =dyn_cast<Instruction>(GEP->getOperand(1))){ if (Inst->getOpcode() == Instruction::Shl && isa<ConstantInt>(Inst->getOperand(1))) { ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); uint32_t ShAmtVal = ShAmt->getLimitedValue(64); Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && isa<ConstantInt>(Inst->getOperand(1))) { Scale = cast<ConstantInt>(Inst->getOperand(1)); NewIdx = Inst->getOperand(0); } } // If the index will be to exactly the right offset with the scale taken // out, perform the transformation. Note, we don't know whether Scale is // signed or not. We'll use unsigned version of division/modulo // operation after making sure Scale doesn't have the sign bit set. if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && Scale->getZExtValue() % ArrayEltSize == 0) { Scale = ConstantInt::get(Scale->getType(), Scale->getZExtValue() / ArrayEltSize); if (Scale->getZExtValue() != 1) { Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), false /*ZExt*/); NewIdx = BinaryOperator::Create(BinaryOperator::Mul, NewIdx, C, "idxscale"); } // Insert the new GEP instruction. Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP->getContext())); Idx[1] = NewIdx; Value *NewGEP = GetElementPtrInst::Create(StrippedPtr, Idx, GEP->getName(), GEP); GEP->replaceAllUsesWith(new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP)); continue; } } } } } } } return true; }
string esp::parseName(Value *value){ // has existed if(names.find(value) != names.end()) return names[value]; string name = ""; Value *current = value; /* bool continueFlag = true; do{ if(isa<Instruction > (current)){ Instruction* inst = dyn_cast<Instruction>(current); unsigned op = inst->getOpcode(); switch(op){ case Instruction::Ret :{ break; } case Instruction::Br :{ break; } case Instruction::Switch :{ break; } case Instruction::Call :{ CallInst *callinst = (CallInst*) current; if (((CallInst*) current)->getCalledFunction() != NULL) { name += string("@")+((CallInst*) current)->getCalledFunction()->getNameStr() + "("; } else { name += string("@[funcPTR]("); name += ((CallInst*) current)->getCalledValue()->getNameStr(); } for (unsigned i = 1; i < callinst->getNumOperands(); i++) { name += esp::parseName(callinst->getOperand(i)); } name += string(")"); continueFlag = false; break; } case Instruction::PHI :{ name += string("PHI["); name += current->getNameStr(); PHINode *phi = (PHINode*) current; for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) { Value *incoming = phi->getIncomingValue(i); if (i != 0) name += ","; if (!hasLoop(incoming)) { if (!incoming->hasName()) { name += esp::parseName(incoming); } else { name += incoming->getNameStr(); } } } name += std::string("]"); continueFlag = false; break; } case Instruction::Select :{ break; } case Instruction::Add :{ name += "+"; name += parseBinaryOpName(inst); break; } case Instruction::Sub :{ name += "-"; name += parseBinaryOpName(inst); break; } case Instruction::Mul :{ name += "*"; name += parseBinaryOpName(inst); break; } case Instruction::UDiv :{ name += "/"; name += parseBinaryOpName(inst); break; } case Instruction::SDiv :{ name += "//"; name += parseBinaryOpName(inst); break; } case Instruction::And :{ name += "&"; name += parseBinaryOpName(inst); break; } case Instruction::Or :{ name += "|"; name += parseBinaryOpName(inst); break; } case Instruction::Xor :{ name += "^"; name += parseBinaryOpName(inst); break; } case Instruction::Shl :{ name += "<<"; name += parseBinaryOpName(inst); break; } case Instruction::LShr :{ name += ">>"; name += parseBinaryOpName(inst); break; } case Instruction::AShr :{ name += ">>>"; name += parseBinaryOpName(inst); break; } case Instruction::ICmp :{ ICmpInst * icmp = dyn_cast<ICmpInst>(current); if (isa<Constant>(icmp->getOperand(0))) { name += esp::parseName(icmp->getOperand(1)); continueFlag = false; } else { name += esp::parseName(icmp->getOperand(0)); continueFlag = false; } break; } case Instruction::Alloca :{ name += current->getNameStr(); break; } case Instruction::Load :{ if (((LoadInst*) inst)->isVolatile()) name += std::string("@VolatileLoad"); name += "*"; name += esp::parseName(inst->getOperand(0)); continueFlag = false; break; } case Instruction::Store :{ // need to handle continueFlag = false; break; } case Instruction::GetElementPtr :{ GetElementPtrInst * gep = dyn_cast<GetElementPtrInst>(current); unsigned ops = gep->getNumOperands(); name += "["; for (unsigned i = 1; i < ops; i++) { Value *v = gep->getOperand(i); if (ConstantInt * ci = dyn_cast<ConstantInt>(v)) { if (i == 1 && ci->equalsInt(0)) continue; name += "."; name += ci->getValue().toString(10, false); } else { name += "."; name += esp::parseName(v); } } name += "]"; name += esp::parseName(gep->getOperand(0)); continueFlag = false; break; } case Instruction::BitCast:{ name += esp::parseName(inst->getOperand(0)); continueFlag = false; break; } default :{ // Illegal or unsupported instruction name += current->getNameStr(); break; } } }else if(isa<Argument>(current)){ if (arguments.find(current) != arguments.end()) name += std::string("$") + current->getNameStr(); }else if(isa<GlobalValue>(current)){ name += std::string("@") + current->getNameStr(); }else if(isa<ConstantInt>(current)){ ConstantInt * cint = dyn_cast<ConstantInt > (current); name += cint->getValue().toString(10, true); }else if (isa<Constant > (current)) { Constant *c = dyn_cast<Constant > (current); if (c->isNullValue()) { name += "null"; } }else{ // Illegal format } if(!continueFlag) break; current = parents[current]; }while(current); */ //Refactor do { if (isa<LoadInst > (current)) { name += "*"; if (parents[current] == NULL) name += (((LoadInst*) current)->getOperand(0))->getNameStr(); if (((LoadInst*) current)->isVolatile()) name += std::string("@VolatileLoad"); } else if (dyn_cast<GetElementPtrInst > (current)) { GetElementPtrInst * gep = dyn_cast<GetElementPtrInst > (current); unsigned ops = gep->getNumOperands(); name += "["; for (unsigned i = 1; i < ops; i++) { Value *v = gep->getOperand(i); if (dyn_cast<ConstantInt > (current)) { ConstantInt * ci = dyn_cast<ConstantInt > (current); if (i == 1 && ci->equalsInt(0)) continue; name += "."; name += ci->getValue().toString(10, false); } else { name += "."; name += parseName(v); } } name += "]"; name += parseName(gep->getOperand(0)); break; } else if (isa<AllocaInst > (current)) { name += current->getNameStr(); } else if (isa<Argument > (current)) { if (arguments.find(current) != arguments.end()) name += std::string("$") + current->getNameStr(); } else if (isa<GlobalValue > (current)) { name += std::string("@") + current->getNameStr(); } else if (isa<CallInst > (current)) { CallInst *callinst = (CallInst*) current; if (((CallInst*) current)->getCalledFunction() != NULL) { name += std::string("@")+((CallInst*) current)->getCalledFunction()->getNameStr() + "("; } else { name += std::string("@[funcPTR]("); name += ((CallInst*) current)->getCalledValue()->getNameStr(); } for (unsigned i = 1; i < callinst->getNumOperands(); i++) { name += parseName(callinst->getOperand(i)); } name += std::string(")"); break; } else if (isa<CastInst > (current)) { } else if (isa<PHINode > (current)) { /* name += std::string("PHI["); s += parent->getNameStr(); PHINode *phi = (PHINode*) parent; for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) { //s+=phi->getIncomingBlock(i)->getNameStr(); Value *incoming = phi->getIncomingValue(i); if (i != 0) s += ","; if (!hasLoop(incoming)) { DEBUG(errs() << "incoming#" << i << " no loop(i rather doubt it)\n"); if (!incoming->hasName()) { s += parseName(incoming); } else { s += incoming->getNameStr(); } } } // PHI nodes...ugh s += std::string("]"); break; */ } else if (isa<BinaryOperator > (current)) { BinaryOperator *bo = dyn_cast<BinaryOperator > (current); Instruction::BinaryOps opcode = bo->getOpcode(); if (opcode == Instruction::Add) { name.append("+"); } else if (opcode == Instruction::Sub) { name.append("-"); } else if (opcode == Instruction::Or) { name.append("||"); } else if (opcode == Instruction::Mul) { name.append("*"); } else if (opcode == Instruction::Xor) { name.append("^"); } else if (opcode == Instruction::And) { name.append("&&"); } else if (opcode == Instruction::Shl) { name.append("<<"); } else if (opcode == Instruction::AShr) { name.append(">>"); } else if (opcode == Instruction::LShr) { name.append(">>>"); } Value *v0 = bo->getOperand(0); Value *v1 = bo->getOperand(1); if (isa<ConstantInt > (v0)) { name += ((ConstantInt*) v0)->getValue().toString(10, false); } else if (isa<ConstantInt > (v1)) { name += ((ConstantInt*) v1)->getValue().toString(10, false); } else { printDebugMsg("Binary Operation between non-constants\n"); } } else if (dyn_cast<GEPOperator > (current)) { GEPOperator * gep = dyn_cast<GEPOperator > (current); unsigned ops = gep->getNumOperands(); name += "["; for (unsigned i = 1; i < ops; i++) { Value *v = gep->getOperand(i); if (dyn_cast<ConstantInt > (v)) { ConstantInt * ci = dyn_cast<ConstantInt > (v); if (i == 1 && ci->equalsInt(0)) continue; name += "."; name += ci->getValue().toString(10, false); } } name += "]"; name += parseName(gep->getOperand(0)); break; } else if (dyn_cast<ICmpInst > (current)) { ICmpInst * icmp = dyn_cast<ICmpInst > (current); if (isa<Constant > (icmp->getOperand(0))) { name += parseName(icmp->getOperand(1)); break; } else { name += parseName(icmp->getOperand(0)); break; } } else if (dyn_cast<ConstantInt > (current)) { ConstantInt * cint = dyn_cast<ConstantInt > (current); name += cint->getValue().toString(10, true); } else { name += current->getNameStr(); // might not work } } while ((current = parents[current])); names[value] = name; return name; }
void ArrayObfs::ArrObfuscate ( Function *F ) { // Iterate the whole Function Function *f = F; for ( Function::iterator bb = f->begin(); bb != f->end(); ++bb ) { for ( BasicBlock::iterator inst = bb->begin(); inst != bb->end(); ) { if ( inst->getOpcode() == 29 ) // getelementptr { //errs() << "INST : " << *inst << "\n"; GetElementPtrInst *Ary = dyn_cast<GetElementPtrInst>(&*inst); Value *ptrVal = Ary->getOperand(0); Type *type = ptrVal->getType(); unsigned numOfOprand = Ary->getNumOperands(); unsigned lastOprand = numOfOprand - 1; // Check Type Array if ( PointerType *ptrType = dyn_cast<PointerType>( type ) ) { Type *elementType = ptrType->getElementType(); if ( elementType->isArrayTy() ) { // Skip if Index is a Variable if ( dyn_cast<ConstantInt>( Ary->getOperand( lastOprand ) ) ) { ////////////////////////////////////////////////////////////////////////////// // Do Real Stuff Value *oprand = Ary->getOperand( lastOprand ); Value *basePtr = Ary->getOperand( 0 ); APInt offset = dyn_cast<ConstantInt>(oprand)->getValue(); Value *prevPtr = basePtr; // Enter a Loop to Perform Random Obfuscation unsigned cnt = 100; // Prelog : Clone the Original Inst unsigned ObfsIdx = cryptoutils->get_uint64_t() & 0xffff; Value *newOprand = ConstantInt::get( oprand->getType(), ObfsIdx ); Instruction *gep = inst->clone(); gep->setOperand( lastOprand, newOprand ); gep->setOperand( 0, prevPtr ); gep->insertBefore( inst ); prevPtr = gep; offset = offset - ObfsIdx; // Create a Global Variable to Avoid Optimization Module *M = f->getParent(); Constant *initGV = ConstantInt::get( prevPtr->getType(), 0 ); GlobalVariable *gv = new GlobalVariable( *M, prevPtr->getType(), false, GlobalValue::CommonLinkage, initGV ); while ( cnt-- ) { // Iteratively Generate Obfuscated Code switch( cryptoutils->get_uint64_t() & 7 ) { // Random Indexing Obfuscation case 0 : case 1 : case 2 : { //errs() << "=> Random Index \n"; // Create New Instruction // Create Obfuscated New Oprand in ConstantInt Type unsigned ObfsIdx = cryptoutils->get_uint64_t() & 0xffff; Value *newOprand = ConstantInt::get( oprand->getType(), ObfsIdx ); // Create GetElementPtrInst Instruction GetElementPtrInst *gep = GetElementPtrInst::Create( prevPtr, newOprand, "", inst ); //Set prevPtr prevPtr = gep; //errs() << "Created : " << *prevPtr << "\n"; offset = offset - ObfsIdx; break; } // Ptr Dereference case 3 : case 4 : { //errs() << "=> Ptr Dereference \n"; Module *M = f->getParent(); Value *ONE = ConstantInt::get( Type::getInt32Ty( M->getContext() ), 1 ); Value *tmp = new AllocaInst( prevPtr->getType(), ONE, "", inst ); new StoreInst( prevPtr, tmp, inst ); prevPtr = new LoadInst( tmp, "", inst ); break; } // Ptr Value Transform case 5 : case 6 : case 7 : { //errs() << "=> Ptr Value Trans \n"; unsigned RandNum = cryptoutils->get_uint64_t(); Value *ObfsVal = ConstantInt::get( prevPtr->getType(), RandNum ); BinaryOperator *op = BinaryOperator::Create( Instruction::FAdd, prevPtr, ObfsVal, "", inst ); new StoreInst( prevPtr, gv, inst ); BinaryOperator::Create( Instruction::FSub, gv, ObfsVal, "", inst ); prevPtr = new LoadInst( gv, "", inst ); break; } } } // Postlog : Fix the Original Indexing { Value *fixOprand = ConstantInt::get( oprand->getType(), offset ); // Refine the Last Instruction GetElementPtrInst *gep = GetElementPtrInst::Create( prevPtr, fixOprand, "", inst ); // Fix the Relationship inst->replaceAllUsesWith( gep ); // Finally : Unlink This Instruction From Parent Instruction *DI = inst++; //errs() << "user_back : " << *(DI->user_back()) << "\n"; DI->removeFromParent(); } ////////////////////////////////////////////////////////////////////////////// // End : Variable Index } else { inst++; } // End : Check Array Type } else { inst++; } // End : Check Pointer Type } else { inst++; } // End : Check Opcode GetElementPtr } else { inst++; } } } ++ArrayMod; }
Value *llvm::ConvertExpressionToType(Value *V, const Type *Ty, ValueMapCache &VMC, const TargetData &TD) { if (V->getType() == Ty) return V; // Already where we need to be? ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(V); if (VMCI != VMC.ExprMap.end()) { const Value *GV = VMCI->second; const Type *GTy = VMCI->second->getType(); assert(VMCI->second->getType() == Ty); if (Instruction *I = dyn_cast<Instruction>(V)) ValueHandle IHandle(VMC, I); // Remove I if it is unused now! return VMCI->second; } DEBUG(std::cerr << "CETT: " << (void*)V << " " << *V); Instruction *I = dyn_cast<Instruction>(V); if (I == 0) { Constant *CPV = cast<Constant>(V); // Constants are converted by constant folding the cast that is required. // We assume here that all casts are implemented for constant prop. Value *Result = ConstantExpr::getCast(CPV, Ty); // Add the instruction to the expression map //VMC.ExprMap[V] = Result; return Result; } BasicBlock *BB = I->getParent(); std::string Name = I->getName(); if (!Name.empty()) I->setName(""); Instruction *Res; // Result of conversion ValueHandle IHandle(VMC, I); // Prevent I from being removed! Constant *Dummy = Constant::getNullValue(Ty); switch (I->getOpcode()) { case Instruction::Cast: assert(VMC.NewCasts.count(ValueHandle(VMC, I)) == 0); Res = new CastInst(I->getOperand(0), Ty, Name); VMC.NewCasts.insert(ValueHandle(VMC, Res)); break; case Instruction::Add: case Instruction::Sub: Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(), Dummy, Dummy, Name); VMC.ExprMap[I] = Res; // Add node to expression eagerly Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), Ty, VMC, TD)); Res->setOperand(1, ConvertExpressionToType(I->getOperand(1), Ty, VMC, TD)); break; case Instruction::Shl: case Instruction::Shr: Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(), Dummy, I->getOperand(1), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), Ty, VMC, TD)); break; case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); Res = new LoadInst(Constant::getNullValue(PointerType::get(Ty)), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(LI->getPointerOperand(), PointerType::get(Ty), VMC, TD)); assert(Res->getOperand(0)->getType() == PointerType::get(Ty)); assert(Ty == Res->getType()); assert(Res->getType()->isFirstClassType() && "Load of structure or array!"); break; } case Instruction::PHI: { PHINode *OldPN = cast<PHINode>(I); PHINode *NewPN = new PHINode(Ty, Name); VMC.ExprMap[I] = NewPN; // Add node to expression eagerly while (OldPN->getNumOperands()) { BasicBlock *BB = OldPN->getIncomingBlock(0); Value *OldVal = OldPN->getIncomingValue(0); ValueHandle OldValHandle(VMC, OldVal); OldPN->removeIncomingValue(BB, false); Value *V = ConvertExpressionToType(OldVal, Ty, VMC, TD); NewPN->addIncoming(V, BB); } Res = NewPN; break; } case Instruction::Malloc: { Res = ConvertMallocToType(cast<MallocInst>(I), Ty, Name, VMC, TD); break; } case Instruction::GetElementPtr: { // GetElementPtr's are directly convertible to a pointer type if they have // a number of zeros at the end. Because removing these values does not // change the logical offset of the GEP, it is okay and fair to remove them. // This can change this: // %t1 = getelementptr %Hosp * %hosp, ubyte 4, ubyte 0 ; <%List **> // %t2 = cast %List * * %t1 to %List * // into // %t2 = getelementptr %Hosp * %hosp, ubyte 4 ; <%List *> // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); // Check to see if there are zero elements that we can remove from the // index array. If there are, check to see if removing them causes us to // get to the right type... // std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); const Type *BaseType = GEP->getPointerOperand()->getType(); const Type *PVTy = cast<PointerType>(Ty)->getElementType(); Res = 0; while (!Indices.empty() && Indices.back() == Constant::getNullValue(Indices.back()->getType())){ Indices.pop_back(); if (GetElementPtrInst::getIndexedType(BaseType, Indices, true) == PVTy) { if (Indices.size() == 0) Res = new CastInst(GEP->getPointerOperand(), BaseType); // NOOP CAST else Res = new GetElementPtrInst(GEP->getPointerOperand(), Indices, Name); break; } } if (Res == 0 && GEP->getNumOperands() == 2 && GEP->getType() == PointerType::get(Type::SByteTy)) { // Otherwise, we can convert a GEP from one form to the other iff the // current gep is of the form 'getelementptr sbyte*, unsigned N // and we could convert this to an appropriate GEP for the new type. // const PointerType *NewSrcTy = PointerType::get(PVTy); BasicBlock::iterator It = I; // Check to see if 'N' is an expression that can be converted to // the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewSrcTy, I->getOperand(1), Indices, TD, &It); if (ElTy) { assert(ElTy == PVTy && "Internal error, setup wrong!"); Res = new GetElementPtrInst(Constant::getNullValue(NewSrcTy), Indices, Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), NewSrcTy, VMC, TD)); } } // Otherwise, it could be that we have something like this: // getelementptr [[sbyte] *] * %reg115, uint %reg138 ; [sbyte]** // and want to convert it into something like this: // getelemenptr [[int] *] * %reg115, uint %reg138 ; [int]** // if (Res == 0) { const PointerType *NewSrcTy = PointerType::get(PVTy); std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); Res = new GetElementPtrInst(Constant::getNullValue(NewSrcTy), Indices, Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), NewSrcTy, VMC, TD)); } assert(Res && "Didn't find match!"); break; } case Instruction::Call: { assert(!isa<Function>(I->getOperand(0))); // If this is a function pointer, we can convert the return type if we can // convert the source function pointer. // const PointerType *PT = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FT = cast<FunctionType>(PT->getElementType()); std::vector<const Type *> ArgTys(FT->param_begin(), FT->param_end()); const FunctionType *NewTy = FunctionType::get(Ty, ArgTys, FT->isVarArg()); const PointerType *NewPTy = PointerType::get(NewTy); if (Ty == Type::VoidTy) Name = ""; // Make sure not to name calls that now return void! Res = new CallInst(Constant::getNullValue(NewPTy), std::vector<Value*>(I->op_begin()+1, I->op_end()), Name); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0),NewPTy,VMC,TD)); break; } default: assert(0 && "Expression convertible, but don't know how to convert?"); return 0; } assert(Res->getType() == Ty && "Didn't convert expr to correct type!"); BB->getInstList().insert(I, Res); // Add the instruction to the expression map VMC.ExprMap[I] = Res; unsigned NumUses = I->use_size(); for (unsigned It = 0; It < NumUses; ) { unsigned OldSize = NumUses; Value::use_iterator UI = I->use_begin(); std::advance(UI, It); ConvertOperandToType(*UI, I, Res, VMC, TD); NumUses = I->use_size(); if (NumUses == OldSize) ++It; } DEBUG(std::cerr << "ExpIn: " << (void*)I << " " << *I << "ExpOut: " << (void*)Res << " " << *Res); return Res; }
// ExpressionConvertibleToType - Return true if it is possible bool llvm::ExpressionConvertibleToType(Value *V, const Type *Ty, ValueTypeCache &CTMap, const TargetData &TD) { // Expression type must be holdable in a register. if (!Ty->isFirstClassType()) return false; ValueTypeCache::iterator CTMI = CTMap.find(V); if (CTMI != CTMap.end()) return CTMI->second == Ty; // If it's a constant... all constants can be converted to a different // type. // if (isa<Constant>(V) && !isa<GlobalValue>(V)) return true; CTMap[V] = Ty; if (V->getType() == Ty) return true; // Expression already correct type! Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; // Otherwise, we can't convert! switch (I->getOpcode()) { case Instruction::Cast: // We can convert the expr if the cast destination type is losslessly // convertible to the requested type. if (!Ty->isLosslesslyConvertibleTo(I->getType())) return false; // We also do not allow conversion of a cast that casts from a ptr to array // of X to a *X. For example: cast [4 x %List *] * %val to %List * * // if (const PointerType *SPT = dyn_cast<PointerType>(I->getOperand(0)->getType())) if (const PointerType *DPT = dyn_cast<PointerType>(I->getType())) if (const ArrayType *AT = dyn_cast<ArrayType>(SPT->getElementType())) if (AT->getElementType() == DPT->getElementType()) return false; break; case Instruction::Add: case Instruction::Sub: if (!Ty->isInteger() && !Ty->isFloatingPoint()) return false; if (!ExpressionConvertibleToType(I->getOperand(0), Ty, CTMap, TD) || !ExpressionConvertibleToType(I->getOperand(1), Ty, CTMap, TD)) return false; break; case Instruction::Shr: if (!Ty->isInteger()) return false; if (Ty->isSigned() != V->getType()->isSigned()) return false; // FALL THROUGH case Instruction::Shl: if (!Ty->isInteger()) return false; if (!ExpressionConvertibleToType(I->getOperand(0), Ty, CTMap, TD)) return false; break; case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); if (!ExpressionConvertibleToType(LI->getPointerOperand(), PointerType::get(Ty), CTMap, TD)) return false; break; } case Instruction::PHI: { PHINode *PN = cast<PHINode>(I); // Be conservative if we find a giant PHI node. if (PN->getNumIncomingValues() > 32) return false; for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) if (!ExpressionConvertibleToType(PN->getIncomingValue(i), Ty, CTMap, TD)) return false; break; } case Instruction::Malloc: if (!MallocConvertibleToType(cast<MallocInst>(I), Ty, CTMap, TD)) return false; break; case Instruction::GetElementPtr: { // GetElementPtr's are directly convertible to a pointer type if they have // a number of zeros at the end. Because removing these values does not // change the logical offset of the GEP, it is okay and fair to remove them. // This can change this: // %t1 = getelementptr %Hosp * %hosp, ubyte 4, ubyte 0 ; <%List **> // %t2 = cast %List * * %t1 to %List * // into // %t2 = getelementptr %Hosp * %hosp, ubyte 4 ; <%List *> // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); const PointerType *PTy = dyn_cast<PointerType>(Ty); if (!PTy) return false; // GEP must always return a pointer... const Type *PVTy = PTy->getElementType(); // Check to see if there are zero elements that we can remove from the // index array. If there are, check to see if removing them causes us to // get to the right type... // std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); const Type *BaseType = GEP->getPointerOperand()->getType(); const Type *ElTy = 0; while (!Indices.empty() && Indices.back() == Constant::getNullValue(Indices.back()->getType())){ Indices.pop_back(); ElTy = GetElementPtrInst::getIndexedType(BaseType, Indices, true); if (ElTy == PVTy) break; // Found a match!! ElTy = 0; } if (ElTy) break; // Found a number of zeros we can strip off! // Otherwise, we can convert a GEP from one form to the other iff the // current gep is of the form 'getelementptr sbyte*, long N // and we could convert this to an appropriate GEP for the new type. // if (GEP->getNumOperands() == 2 && GEP->getType() == PointerType::get(Type::SByteTy)) { // Do not Check to see if our incoming pointer can be converted // to be a ptr to an array of the right type... because in more cases than // not, it is simply not analyzable because of pointer/array // discrepancies. To fix this, we will insert a cast before the GEP. // // Check to see if 'N' is an expression that can be converted to // the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(PTy, I->getOperand(1), Indices, TD); if (ElTy == PVTy) { if (!ExpressionConvertibleToType(I->getOperand(0), PointerType::get(ElTy), CTMap, TD)) return false; // Can't continue, ExConToTy might have polluted set! break; } } // Otherwise, it could be that we have something like this: // getelementptr [[sbyte] *] * %reg115, long %reg138 ; [sbyte]** // and want to convert it into something like this: // getelemenptr [[int] *] * %reg115, long %reg138 ; [int]** // if (GEP->getNumOperands() == 2 && PTy->getElementType()->isSized() && TD.getTypeSize(PTy->getElementType()) == TD.getTypeSize(GEP->getType()->getElementType())) { const PointerType *NewSrcTy = PointerType::get(PVTy); if (!ExpressionConvertibleToType(I->getOperand(0), NewSrcTy, CTMap, TD)) return false; break; } return false; // No match, maybe next time. } case Instruction::Call: { if (isa<Function>(I->getOperand(0))) return false; // Don't even try to change direct calls. // If this is a function pointer, we can convert the return type if we can // convert the source function pointer. // const PointerType *PT = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FT = cast<FunctionType>(PT->getElementType()); std::vector<const Type *> ArgTys(FT->param_begin(), FT->param_end()); const FunctionType *NewTy = FunctionType::get(Ty, ArgTys, FT->isVarArg()); if (!ExpressionConvertibleToType(I->getOperand(0), PointerType::get(NewTy), CTMap, TD)) return false; break; } default: return false; } // Expressions are only convertible if all of the users of the expression can // have this value converted. This makes use of the map to avoid infinite // recursion. // for (Value::use_iterator It = I->use_begin(), E = I->use_end(); It != E; ++It) if (!OperandConvertibleToType(*It, I, Ty, CTMap, TD)) return false; return true; }