void TauFunctionTrace::instrument(){
    //InstrumentationTool::instrument();

    LineInfoFinder* lineInfoFinder = NULL;
    if (hasLineInformation()){
        lineInfoFinder = getLineInfoFinder();
    }

    InstrumentationPoint* p;

    uint64_t nameAddr = reserveDataOffset(sizeof(uint64_t));
    uint64_t fileAddr = reserveDataOffset(sizeof(uint64_t));
    uint64_t lineAddr = reserveDataOffset(sizeof(uint32_t));
    uint64_t siteIndexAddr = reserveDataOffset(sizeof(uint32_t));

    uint32_t site = functionEntry->addConstantArgument();
    ASSERT(site == functionExit->addConstantArgument());

    std::pebil_map_type<uint64_t, ControlInfo> functions;
    std::vector<uint64_t> orderedfuncs;

    uint32_t sequenceId = 0;
    if (doIntro){
        // go over all functions and intrument entries/exits
        for (uint32_t i = 0; i < getNumberOfExposedFunctions(); i++){
            Function* function = getExposedFunction(i);
            uint64_t addr = function->getBaseAddress();
            if (instrumentList && !instrumentList->functionMatches(function->getName())){
                continue;
            }
            if (!strcmp(function->getName(), "_fini")){
                continue;
            }

            BasicBlock* entryBlock = function->getFlowGraph()->getEntryBlock();
            Vector<BasicBlock*>* exitPoints = function->getFlowGraph()->getExitBlocks();

            std::string c;
            c.append(function->getName());
            if (c == "_start"){
                exitPoints->append(getProgramExitBlock());
                PRINT_INFOR("Special case: inserting exit for _start inside _fini since control generally doesn't reach its exit");
            }

            ASSERT(functions.count(addr) == 0 && "Multiple functions have the same base address?");

            PRINT_INFOR("[FUNCTION index=%d] internal instrumentation: %s", sequenceId, function->getName());

            ControlInfo f = ControlInfo();
            f.name = c;
            f.file = "";
            f.line = 0;
            f.index = sequenceId++;
            f.baseaddr = addr;
            f.type = ControlType_Function;

            LineInfo* li = NULL;
            if (lineInfoFinder){
                li = lineInfoFinder->lookupLineInfo(addr);
            }

            if (li){
                f.file.append(li->getFileName());
                f.line = li->GET(lr_line);
            }

            functions[addr] = f;
            orderedfuncs.push_back(addr);

            InstrumentationPoint* prior = addInstrumentationPoint(entryBlock->getLeader(), functionEntry, InstrumentationMode_tramp, InstLocation_prior);
            prior->setPriority(InstPriority_custom3);
            assignStoragePrior(prior, f.index, site);

            for (uint32_t j = 0; j < exitPoints->size(); j++){
                InstrumentationPoint* after = addInstrumentationPoint((*exitPoints)[j]->getExitInstruction(), functionExit, InstrumentationMode_tramp, InstLocation_prior);
                after->setPriority(InstPriority_custom5);

                if (c == "_start" && j == exitPoints->size() - 1){
                    after->setPriority(InstPriority_custom6);
                }
                assignStoragePrior(after, f.index, site);
            }

            delete exitPoints;
        }

    } else {
        // go over all instructions. when we find a call, instrument it
        for (uint32_t i = 0; i < getNumberOfExposedInstructions(); i++){
            X86Instruction* x = getExposedInstruction(i);
            ASSERT(x->getContainer()->isFunction());
            Function* function = (Function*)x->getContainer();

            if (x->isFunctionCall()){
                uint64_t addr = x->getTargetAddress();
                Symbol* functionSymbol = getElfFile()->lookupFunctionSymbol(addr);
            
                if (functionSymbol){

                    if (instrumentList && !instrumentList->functionMatches(functionSymbol->getSymbolName())){
                        continue;
                    }
                    ASSERT(x->getSizeInBytes() == Size__uncond_jump);
                
                    std::string c;
                    c.append(functionSymbol->getSymbolName());
                    if (functions.count(addr) == 0){
                        PRINT_INFOR("[FUNCTION index=%d] call site instrumentation: %#lx(%s) -> %s", sequenceId, addr, function->getName(), functionSymbol->getSymbolName());

                        ControlInfo f = ControlInfo();
                        f.name = c;
                        f.file = "";
                        f.line = 0;
                        f.index = sequenceId++;
                        f.baseaddr = addr;
                        f.type = ControlType_Function;

                        LineInfo* li = NULL;
                        if (lineInfoFinder){
                            li = lineInfoFinder->lookupLineInfo(addr);
                        }

                        if (li){
                            f.file.append(li->getFileName());
                            f.line = li->GET(lr_line);
                        }

                        functions[addr] = f;
                        orderedfuncs.push_back(addr);
                    }
                    uint32_t idx = functions[addr].index;

                    Base* exitpoint = (Base*)x;
                    if (c == "__libc_start_main"){
                        PRINT_INFOR("Special case: inserting exit for __libc_start_main inside _fini since this call generally doesn't return");
                        exitpoint = (Base*)getProgramExitBlock();
                    }

                    InstrumentationPoint* prior = addInstrumentationPoint(x, functionEntry, InstrumentationMode_tramp, InstLocation_prior);
                    InstrumentationPoint* after = addInstrumentationPoint(exitpoint, functionExit, InstrumentationMode_tramp, InstLocation_after);

                    assignStoragePrior(prior, idx, site);
                    assignStoragePrior(after, idx, site);
                }
            }
        }
    }


    // set up argument passing for function registration
    functionRegister->addArgument(nameAddr);
    functionRegister->addArgument(fileAddr);
    functionRegister->addArgument(lineAddr);
    uint32_t siteReg = functionRegister->addConstantArgument();


    // go over every function that was found, insert a registration call at program start
    for (std::vector<uint64_t>::iterator it = orderedfuncs.begin(); it != orderedfuncs.end(); it++){
        uint64_t addr = *it;
        ControlInfo f = functions[addr];
       
        ASSERT(f.baseaddr == addr);

        InstrumentationPoint* p = addInstrumentationPoint(getProgramEntryBlock(), functionRegister, InstrumentationMode_tramp);
        p->setPriority(InstPriority_custom1);

        const char* cstring = f.name.c_str();
        uint64_t storage = reserveDataOffset(strlen(cstring) + 1);
        initializeReservedData(getInstDataAddress() + storage, strlen(cstring), (void*)cstring);
        assignStoragePrior(p, getInstDataAddress() + storage, getInstDataAddress() + nameAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());

        const char* cstring2 = f.file.c_str();
        if (f.file == ""){
            assignStoragePrior(p, NULL, getInstDataAddress() + fileAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());            
        } else {
            storage = reserveDataOffset(strlen(cstring2) + 1);
            initializeReservedData(getInstDataAddress() + storage, strlen(cstring2), (void*)cstring2);
            assignStoragePrior(p, getInstDataAddress() + storage, getInstDataAddress() + fileAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());
        }

        assignStoragePrior(p, f.line, getInstDataAddress() + lineAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());
        assignStoragePrior(p, f.index, siteReg);
    }


    if (!instrumentList){
        return;
    }

    // instrument loops
    std::pebil_map_type<uint64_t, ControlInfo> loops;
    std::vector<uint64_t> orderedloops;

    loopRegister->addArgument(nameAddr);
    loopRegister->addArgument(fileAddr);
    loopRegister->addArgument(lineAddr);
    ASSERT(siteReg == loopRegister->addConstantArgument());

    for (uint32_t i = 0; i < getNumberOfExposedFunctions(); i++){
        Function* function = getExposedFunction(i);
        FlowGraph* flowgraph = function->getFlowGraph();

        if (!instrumentList->loopMatches(function->getName())){
            continue;
        }

        for (uint32_t j = 0; j < flowgraph->getNumberOfLoops(); j++){
            Loop* loop = flowgraph->getLoop(j);
            uint32_t depth = flowgraph->getLoopDepth(loop);
            BasicBlock* head = loop->getHead();
            uint64_t addr = head->getBaseAddress();

            // only want outer-most (depth == 1) loops
            if (depth != 1){
                continue;
            }

            BasicBlock** allLoopBlocks = new BasicBlock*[loop->getNumberOfBlocks()];
            loop->getAllBlocks(allLoopBlocks);

            // reject any loop that contains an indirect branch since it is difficult to guarantee that we will find all exits
            bool badLoop = false;
            for (uint32_t k = 0; k < loop->getNumberOfBlocks() && !badLoop; k++){
                BasicBlock* bb = allLoopBlocks[k];
                if (bb->getExitInstruction()->isIndirectBranch()){
                    badLoop = true;
                }
            }

            if (badLoop){
                PRINT_WARN(20, "Loop at %#lx in %s contains an indirect branch so we can't guarantee that all exits will be found. skipping!", addr, function->getName());
                delete[] allLoopBlocks;
                continue;
            }

            std::string c;
            c.append(function->getName());

            uint32_t entryc = 0;

            Vector<LoopPoint*>* points = NULL;

            // if addr already exists, it means that two loops share a head and we are going to merge them logically here
            if (loops.count(addr) == 0){

                ControlInfo f = ControlInfo();
                f.name = c;
                f.file = "";
                f.line = 0;
                f.index = sequenceId++;
                f.baseaddr = addr;
                f.type = ControlType_Loop;

                points = new Vector<LoopPoint*>();
                f.info = points;

                LineInfo* li = NULL;
                if (lineInfoFinder){
                    li = lineInfoFinder->lookupLineInfo(addr);
                }
                if (li){
                    f.file.append(li->getFileName());
                    f.line = li->GET(lr_line);
                }

                loops[addr] = f;
                orderedloops.push_back(addr);

                // find entries into this loop
                for (uint32_t k = 0; k < head->getNumberOfSources(); k++){
                    BasicBlock* source = head->getSourceBlock(k);

                    if (!loop->isBlockIn(source->getIndex())){
                        LoopPoint* lp = new LoopPoint();
                        points->append(lp);

                        lp->flowgraph = flowgraph;
                        lp->source = source;
                        lp->target = NULL;
                        lp->entry = true;
                        lp->interpose = false;

                        if (source->getBaseAddress() + source->getNumberOfBytes() != head->getBaseAddress()){
                            lp->interpose = true;
                            lp->target = head;
                        }
                        entryc++;
                    }
                }
            }

            ControlInfo f = loops[addr];
            points = f.info;

            // find exits from this loop
            uint32_t exitc = 0;
            for (uint32_t k = 0; k < loop->getNumberOfBlocks(); k++){
                BasicBlock* bb = allLoopBlocks[k];
                if (bb->endsWithReturn()){
                    LoopPoint* lp = new LoopPoint();
                    points->append(lp);

                    lp->flowgraph = flowgraph;
                    lp->source = bb;
                    lp->target = NULL;
                    lp->entry = false;
                    lp->interpose = false;
                    exitc++;
                }

                for (uint32_t m = 0; m < bb->getNumberOfTargets(); m++){
                    BasicBlock* target = bb->getTargetBlock(m);
                    if (!loop->isBlockIn(target->getIndex())){
                        LoopPoint* lp = new LoopPoint();
                        points->append(lp);

                        lp->flowgraph = flowgraph;
                        lp->source = bb;
                        lp->target = NULL;
                        lp->entry = false;
                        lp->interpose = false;

                        if (target->getBaseAddress() != bb->getBaseAddress() + bb->getNumberOfBytes()){
                            lp->interpose = true;
                            lp->target = target;
                        }
                        exitc++;
                    }
                }
            }

            PRINT_INFOR("[LOOP index=%d] loop instrumentation %#lx(%s) has %d entries and %d exits", sequenceId-1, addr, function->getName(), entryc, exitc);

            delete[] allLoopBlocks;
        }
    }

    // go over every loop that was found, insert a registration call at program start
    // [source_addr -> [target_addr -> interposed]]
    std::pebil_map_type<uint64_t, std::pebil_map_type<uint64_t, BasicBlock*> > idone;
    for (std::vector<uint64_t>::iterator it = orderedloops.begin(); it != orderedloops.end(); it++){
        uint64_t addr = *it;
        ControlInfo f = loops[addr];
       
        ASSERT(f.baseaddr == addr);

        InstrumentationPoint* p = addInstrumentationPoint(getProgramEntryBlock(), loopRegister, InstrumentationMode_tramp);
        p->setPriority(InstPriority_custom2);

        const char* cstring = f.name.c_str();
        uint64_t storage = reserveDataOffset(strlen(cstring) + 1);
        initializeReservedData(getInstDataAddress() + storage, strlen(cstring), (void*)cstring);
        assignStoragePrior(p, getInstDataAddress() + storage, getInstDataAddress() + nameAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());

        const char* cstring2 = f.file.c_str();
        if (f.file == ""){
            assignStoragePrior(p, NULL, getInstDataAddress() + fileAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());            
        } else {
            storage = reserveDataOffset(strlen(cstring2) + 1);
            initializeReservedData(getInstDataAddress() + storage, strlen(cstring2), (void*)cstring2);
            assignStoragePrior(p, getInstDataAddress() + storage, getInstDataAddress() + fileAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());
        }

        assignStoragePrior(p, f.line, getInstDataAddress() + lineAddr, X86_REG_CX, getInstDataAddress() + getRegStorageOffset());
        assignStoragePrior(p, f.index, siteReg);

        // now add instrumentation for each loop entry/exit
        Vector<LoopPoint*>* v = (Vector<LoopPoint*>*)f.info;
        for (uint32_t i = 0; i < v->size(); i++){
            LoopPoint* lp = (*v)[i];
            ASSERT(lp->flowgraph && lp->source);

            BasicBlock* bb = lp->source;
            if (lp->interpose){
                ASSERT(lp->target);
                if (idone.count(lp->source->getBaseAddress()) == 0){
                    idone[lp->source->getBaseAddress()] = std::pebil_map_type<uint64_t, BasicBlock*>();
                }
                if (idone[lp->source->getBaseAddress()].count(lp->target->getBaseAddress()) == 0){
                    idone[lp->source->getBaseAddress()][lp->target->getBaseAddress()] = initInterposeBlock(lp->flowgraph, lp->source->getIndex(), lp->target->getIndex());
                }

                bb = idone[lp->source->getBaseAddress()][lp->target->getBaseAddress()];

            } else {
                ASSERT(lp->target == NULL);
            }

            Base* pt = (Base*)bb;
            InstLocations loc = InstLocation_prior;

            // if exit block falls through, we must place the instrumentation point at the very end of the block
            if (!lp->entry && !lp->interpose){
                pt = (Base*)bb->getExitInstruction();
                if (!bb->getExitInstruction()->isReturn()){
                    loc = InstLocation_after;
                }
            }

            InstrumentationFunction* inf = functionExit;
            if (lp->entry){
                inf = functionEntry;
            }

            InstrumentationPoint* p = addInstrumentationPoint(pt, inf, InstrumentationMode_tramp, loc);
            p->setPriority(InstPriority_custom4);
            assignStoragePrior(p, f.index, site);

            delete lp;
        }
        delete v;
    }

}
void BasicBlockCounter::instrument() 
{
    InstrumentationTool::instrument();
    ASSERT(currentPhase == ElfInstPhase_user_reserve && "Instrumentation phase order must be observed"); 
    uint32_t temp32;

    LineInfoFinder* lineInfoFinder = NULL;
    if (hasLineInformation()){
        lineInfoFinder = getLineInfoFinder();
    }

    uint64_t lineArray = reserveDataOffset(getNumberOfExposedBasicBlocks() * sizeof(uint32_t));
    uint64_t fileNameArray = reserveDataOffset(getNumberOfExposedBasicBlocks() * sizeof(char*));
    uint64_t funcNameArray = reserveDataOffset(getNumberOfExposedBasicBlocks() * sizeof(char*));
    uint64_t hashCodeArray = reserveDataOffset(getNumberOfExposedBasicBlocks() * sizeof(uint64_t));
    uint64_t appName = reserveDataOffset((strlen(getApplicationName()) + 1) * sizeof(char));
    initializeReservedData(getInstDataAddress() + appName, strlen(getApplicationName()) + 1, getApplicationName());
    uint64_t instExt = reserveDataOffset((strlen(getInstSuffix()) + 1) * sizeof(char));
    initializeReservedData(getInstDataAddress() + instExt, strlen(getInstSuffix()) + 1, getInstSuffix());

    // the number blocks in the code
    uint64_t counterArrayEntries = reserveDataOffset(sizeof(uint32_t));

    // an array of counters. note that everything is passed by reference
    uint64_t counterArray = reserveDataOffset(getNumberOfExposedBasicBlocks() * sizeof(uint32_t));

    exitFunc->addArgument(counterArray);
    exitFunc->addArgument(appName);
    exitFunc->addArgument(instExt);

    InstrumentationPoint* p = addInstrumentationPoint(getProgramExitBlock(), exitFunc, InstrumentationMode_tramp);
    if (!p->getInstBaseAddress()){
        PRINT_ERROR("Cannot find an instrumentation point at the exit function");
    }

    temp32 = 0;
    for (uint32_t i = 0; i < getNumberOfExposedBasicBlocks(); i++){
        initializeReservedData(getInstDataAddress() + counterArray + i*sizeof(uint32_t), sizeof(uint32_t), &temp32);
    }

    // the number of inst points
    entryFunc->addArgument(counterArrayEntries);
    temp32 = getNumberOfExposedBasicBlocks();
    initializeReservedData(getInstDataAddress() + counterArrayEntries, sizeof(uint32_t), &temp32);

    // an array for line numbers
    entryFunc->addArgument(lineArray);
    // an array for file name pointers
    entryFunc->addArgument(fileNameArray);
    // an array for function name pointers
    entryFunc->addArgument(funcNameArray);
    // an array for hashcodes
    entryFunc->addArgument(hashCodeArray);

    p = addInstrumentationPoint(getProgramEntryBlock(), entryFunc, InstrumentationMode_tramp, FlagsProtectionMethod_full, InstLocation_prior);
    p->setPriority(InstPriority_userinit);
    if (!p->getInstBaseAddress()){
        PRINT_ERROR("Cannot find an instrumentation point at the entry block");
    }

    uint64_t noDataAddr = getInstDataAddress() + reserveDataOffset(strlen(NOSTRING) + 1);
    char* nostring = new char[strlen(NOSTRING) + 1];
    sprintf(nostring, "%s\0", NOSTRING);
    initializeReservedData(noDataAddr, strlen(NOSTRING) + 1, nostring);

    PRINT_DEBUG_MEMTRACK("There are %d instrumentation points", getNumberOfExposedBasicBlocks());
    Vector<BasicBlock*>* allBlocks = new Vector<BasicBlock*>();
    Vector<LineInfo*>* allLineInfos = new Vector<LineInfo*>();

    uint32_t noProtPoints = 0;
    uint32_t complexSelection = 0;
    for (uint32_t i = 0; i < getNumberOfExposedBasicBlocks(); i++){

        BasicBlock* bb = getExposedBasicBlock(i);
        LineInfo* li = NULL;
        if (lineInfoFinder){
            li = lineInfoFinder->lookupLineInfo(bb);
        }
        Function* f = bb->getFunction();

        (*allBlocks).append(bb);
        (*allLineInfos).append(li);

        if (i % 1000 == 0){
            PRINT_DEBUG_MEMTRACK("inst point %d", i);
            PRINT_MEMTRACK_STATS(__LINE__, __FILE__, __FUNCTION__);            
        }

        if (li){
            uint32_t line = li->GET(lr_line);
            initializeReservedData(getInstDataAddress() + lineArray + sizeof(uint32_t)*i, sizeof(uint32_t), &line);

            uint64_t filename = reserveDataOffset(strlen(li->getFileName()) + 1);
            uint64_t filenameAddr = getInstDataAddress() + filename;
            initializeReservedData(getInstDataAddress() + fileNameArray + i*sizeof(char*), sizeof(char*), &filenameAddr);
            initializeReservedData(getInstDataAddress() + filename, strlen(li->getFileName()) + 1, (void*)li->getFileName());

        } else {
            temp32 = 0;
            initializeReservedData(getInstDataAddress() + lineArray + sizeof(uint32_t)*i, sizeof(uint32_t), &temp32);
            initializeReservedData(getInstDataAddress() + fileNameArray + i*sizeof(char*), sizeof(char*), &noDataAddr);
        }
        uint64_t funcname = reserveDataOffset(strlen(f->getName()) + 1);
        uint64_t funcnameAddr = getInstDataAddress() + funcname;
        initializeReservedData(getInstDataAddress() + funcNameArray + i*sizeof(char*), sizeof(char*), &funcnameAddr);
        initializeReservedData(getInstDataAddress() + funcname, strlen(f->getName()) + 1, (void*)f->getName());

        uint64_t hashValue = bb->getHashCode().getValue();
        initializeReservedData(getInstDataAddress() + hashCodeArray + i*sizeof(uint64_t), sizeof(uint64_t), &hashValue);
        
        InstrumentationSnippet* snip = new InstrumentationSnippet();
        uint64_t counterOffset = counterArray + (i * sizeof(uint32_t));

        // snippet contents, in this case just increment a counter
        if (is64Bit()){
            snip->addSnippetInstruction(X86InstructionFactory64::emitAddImmByteToMem(1, getInstDataAddress() + counterOffset));
        } else {
            snip->addSnippetInstruction(X86InstructionFactory32::emitAddImmByteToMem(1, getInstDataAddress() + counterOffset));
        }

        // do not generate control instructions to get back to the application, this is done for
        // the snippet automatically during code generation
            
        // register the snippet we just created
        addInstrumentationSnippet(snip);            
            
        // register an instrumentation point at the function that uses this snippet
        FlagsProtectionMethods prot = FlagsProtectionMethod_light;
#ifndef NO_REG_ANALYSIS
        if (bb->getLeader()->allFlagsDeadIn()){
            prot = FlagsProtectionMethod_none;
            noProtPoints++;
        }
        for (uint32_t j = 0; j < bb->getNumberOfInstructions(); j++){
            if (bb->getInstruction(j)->allFlagsDeadIn() || bb->getInstruction(j)->allFlagsDeadOut()){
                complexSelection++;
                break;
            }
        }
#endif
        InstrumentationPoint* p = addInstrumentationPoint(bb, snip, InstrumentationMode_inline, prot);
    }
    PRINT_MEMTRACK_STATS(__LINE__, __FILE__, __FUNCTION__);
#ifdef NO_REG_ANALYSIS
    PRINT_WARN(10, "Warning: register analysis disabled");
#endif
    PRINT_INFOR("Excluding flags protection for %d/%d instrumentation points", noProtPoints, getNumberOfExposedBasicBlocks());
    //PRINT_INFOR("complex inst point selection: %d/%d instrumentation points", complexSelection, getNumberOfExposedBasicBlocks());

    printStaticFile(allBlocks, allLineInfos);

    delete[] nostring;
    delete allBlocks;
    delete allLineInfos;

    ASSERT(currentPhase == ElfInstPhase_user_reserve && "Instrumentation phase order must be observed"); 
}