예제 #1
0
void CoverageView::refresh()
{
    clear();

    if (!_data || !_activeItem) return;

    ProfileContext::Type t = _activeItem->type();
    TraceFunction* f = 0;
    if (t == ProfileContext::Function) f = (TraceFunction*) _activeItem;
    if (t == ProfileContext::FunctionCycle) f = (TraceFunction*) _activeItem;
    if (!f) return;



    _hc.clear(GlobalConfig::maxListCount());
    SubCost realSum = f->inclusive()->subCost(_eventType);

    TraceFunctionList l;
    if (_showCallers)
      l = Coverage::coverage(f, Coverage::Caller, _eventType);
    else
      l = Coverage::coverage(f, Coverage::Called, _eventType);

    foreach(TraceFunction* f2, l) {
      Coverage* c = (Coverage*) f2->association(Coverage::Rtti);
      if (c && (c->inclusive()>0.0))
        _hc.addCost(f2, SubCost(realSum * c->inclusive()));
    }
예제 #2
0
TEST(CoverageTest, Coverage_add_alignment)
{
    Alignment a;
    a.RefName = "foo";
    a.position(3);

    CigarOp op;

    a.CigarData.clear();  
    op.Type = 'M';
    op.Length = 2;
    a.CigarData.push_back(op);

    op.Type = 'N';
    op.Length = 3;
    a.CigarData.push_back(op);

    op.Type = 'M';
    op.Length = 2;
    a.CigarData.push_back(op);

    Coverage c;
    c.add(a);

    EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 1, 1, 0, 0, 0, 1, 1));
}
예제 #3
0
TEST(CoverageTest, load)
{
    Coverage c;

    std::stringstream coverage_str("bar\t6\n1\n1\n1\n0\n0\n0\nfoo\t5\n0\n0\n1\n1\n0\n");
    c.load(coverage_str);

    EXPECT_THAT(c.coverages.find("bar")->second, ElementsAre(1, 1, 1, 0, 0, 0));
    EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 1, 1, 0));
}
bool CoverageConnector::storeBinaryData(IlwisObject *obj, IlwisTypes tp)
{
    Coverage *coverage = static_cast<Coverage *>(obj);
    ITable attTable = coverage->attributeTable();
    if ( attTable.isValid()) {
        QScopedPointer<TableConnector> conn(createTableConnector(attTable, coverage, tp));
        return conn->storeBinaryData(attTable.ptr());

    }

    return false;
}
예제 #5
0
void CoverageView::refresh()
{
    clear();
    setColumnWidth(0, 50);
    if (!_showCallers)
	setColumnWidth(1, 50);

    if (!_data || !_activeItem) return;

    TraceItem::CostType t = _activeItem->type();
    TraceFunction* f = 0;
    if (t == TraceItem::Function) f = (TraceFunction*) _activeItem;
    if (t == TraceItem::FunctionCycle) f = (TraceFunction*) _activeItem;
    if (!f) return;

    TraceFunction* ff;
    TraceFunctionList l;

    _hc.clear(Configuration::maxListCount());
    SubCost realSum = f->inclusive()->subCost(_costType);

    if (_showCallers)
      l = Coverage::coverage(f, Coverage::Caller, _costType);
    else
      l = Coverage::coverage(f, Coverage::Called, _costType);

    for (ff=l.first();ff;ff=l.next()) {
      Coverage* c = (Coverage*) ff->assoziation(Coverage::Rtti);
      if (c && (c->inclusive()>0.0))
	_hc.addCost(ff, SubCost(realSum * c->inclusive()));
    }

    for(int i=0;i<_hc.realCount();i++) {
      ff = (TraceFunction*) _hc[i];
      Coverage* c = (Coverage*) ff->assoziation(Coverage::Rtti);
      if (_showCallers)
	new CallerCoverageItem(this, c, f, _costType, _groupType);
      else
	new CalleeCoverageItem(this, c, f, _costType, _groupType);
    }
    if (_hc.hasMore()) {
      // a placeholder for all the functions skipped ...
      ff = (TraceFunction*) _hc[_hc.maxSize()-1];
      Coverage* c = (Coverage*) ff->assoziation(Coverage::Rtti);
      if (_showCallers)
	new CallerCoverageItem(this, _hc.count() - _hc.maxSize(),
			       c, f, _costType, _groupType);
      else
	new CalleeCoverageItem(this, _hc.count() - _hc.maxSize(),
			       c, f, _costType, _groupType);
    }
}
예제 #6
0
TEST(CoverageTest, Coverage_setMinReferenceLength)
{
    Coverage c;

    EXPECT_EQ(0, c.coverages.size());
    c.setMinReferenceLength("foo", 10);

    EXPECT_EQ(1, c.coverages.size());
    EXPECT_THAT(c.coverages.find("foo")->second,
                ElementsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0));

    c.setMinReferenceLength("foo", 5);
    EXPECT_THAT(c.coverages.find("foo")->second,
                ElementsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
}
bool CoverageConnector::loadMetaData(Ilwis::IlwisObject *data)
{
    Ilwis3Connector::loadMetaData(data);

    Coverage *coverage = static_cast<Coverage *>(data);
    QString csyName = _odf->value("BaseMap","CoordSystem");
    if ( csyName.toLower() == "latlonwgs84.csy")
        csyName = "code=epsg:4326";
    ICoordinateSystem csy;
    if ( !csy.prepare(csyName)) {
        kernel()->issues()->log(csyName,TR("Coordinate system couldnt be initialized, defaulting to 'unknown'"),IssueObject::itWarning);
        QString resource = QString("ilwis://file/unknown.csy");
        if (!csy.prepare(resource)) {
            kernel()->issues()->log(TR("Fallback to 'unknown failed', corrupt system files defintion"));
            return false;
        }
    }
    coverage->setCoordinateSystem(csy);


    QString attfile = _odf->value("BaseMap", "AttributeTable");
    QString basemaptype = _odf->value("BaseMap", "Type");
    // feature coverages always have an attribute table; rasters might have
    if ( basemaptype != "Map" || attfile != sUNDEF) {
        ITable attTable = prepareAttributeTable(attfile, basemaptype);
        if (!attTable.isValid())
            return false;

        coverage->attributeTable(attTable);
    }

    QString cbounds = _odf->value("BaseMap","CoordBounds");
    QStringList parts = cbounds.split(" ");
    if ( parts.size() == 4) {
        double minx = parts[0].toDouble();
        double miny = parts[1].toDouble();
        double maxx = parts[2].toDouble();
        double maxy = parts[3].toDouble();
        Box2D<double> env(Coordinate(minx, miny), Coordinate(maxx, maxy));
        coverage->envelope(env);
    } else {
        kernel()->issues()->log(TR(ERR_INVALID_PROPERTY_FOR_2).arg("Coordinate boundaries", data->name()), IssueObject::itWarning);
    }


    return true;
}
예제 #8
0
TEST(CoverageTest, toString)
{
    // Note that references are output in sorted order on reference name

    Coverage c;
    c.setMinReferenceLength("foo", 5);
    c.setMinReferenceLength("bar", 6);
    c.add("foo", 3, 2);
    c.add("bar", 1, 3);

    std::string expected = "bar\t6\n1\n1\n1\n0\n0\n0\nfoo\t5\n0\n0\n1\n1\n0\n";

    std::stringstream out;
    c.toOutputStream(out);

    EXPECT_EQ(expected, out.str());

    std::string out_string;
    c.toString(out_string);

    EXPECT_EQ(expected, out_string);
}
예제 #9
0
TEST(CoverageTest, Coverage_add)
{
    Coverage c;
    c.add("foo", 2, 3);

    EXPECT_THAT(c.coverages.find("foo")->second,
                ElementsAre(0, 1, 1, 1));

    c.add("foo", 2, 2);

    EXPECT_THAT(c.coverages.find("foo")->second,
                ElementsAre(0, 2, 2, 1));

    c.add("foo", 6, 2);

    EXPECT_THAT(c.coverages.find("foo")->second,
                ElementsAre(0, 2, 2, 1, 0, 1, 1));

    c.setMinReferenceLength("foo", 10);

    EXPECT_THAT(c.coverages.find("foo")->second,
                ElementsAre(0, 2, 2, 1, 0, 1, 1, 0, 0, 0));
}
예제 #10
0
파일: ParseSNP.cpp 프로젝트: Cibiv/BODscore
void ParseSNP::parseVCF() {
    size_t COMMIT_EACH = 200;
    bool IMMEDIATE = false;

    ifstream vcfFile;
    vcfFile.open(snpfile.c_str(), ifstream::in);
    if (!vcfFile.good()) {
        clog << "SNP Parser: could not open file: " << snpfile.c_str()
                << endl;
        exit(0);
    }
        
        clog << "reading the VCF file" << endl;
         
//    vcfFile.getline(buffer, buffer_size);

    vector<int> tmp;
    string broken_chromosome = "";
//    snps.resize(chromosome_vector.size(), tmp);
   
    Parser * mapped_file = 0;
    FastaParser * fasta = new FastaParser(reffile);
    string ref;
    
    FILE * plotFile = NULL;
    // sqlite3pp::transaction * xct;
    bool transaction_flag = false;
    if (db_flag){
        clog << "writing to database: " <<  plot_file.c_str() << endl;
        db = new SqliteDb( plot_file.c_str() , verbose );
        db->init_sql_table( sample_label );
        db->init_register_table();
        db->place_register_record_begin(  snpfile , read_filename);
        db->init_contig_table();
        if (!db) {  throw std::range_error("null pointer to the database");   
        } else { clog<< "table has been successfully initialized\t" << db << endl;} ;

    } else {
        remove(plot_file.c_str());
        plotFile = fopen(plot_file.c_str(), "a");
        if (plotFile == NULL) {
            cerr << "Error in printing: The file or path that you set "
//                << output.c_str()
                << " is not valid. It can be that there is no disc space available."
                << endl;
                ios_base::failure("cannot open output file for writing!");
                exit(0);
        }
   };

    if (read_filename.find(".bam") != string::npos) {
        mapped_file = new BamParser(read_filename);
    }
    clog << "======================================" << endl;
    clog << mapped_file->get_header() << endl;
    clog << "======================================" << endl;

//    clog << "num of chr " << genome.size() << endl;
 //   clog << "first chr size " << genome[0].size() << endl;
    clog << "`range` has been set to: " << range << endl;
    size_t chr_bam = 0;
    size_t temp_chr_ref = NA;
    size_t chr_ref = NA;
    int pos = 0;
    //    int n_snp = 0;  // <- global
    // ref = fasta->getChr(chr_ref);

    Coverage * cov;
    cov = new Coverage(range);

    while (!vcfFile.eof()) {

//        vcfFile.getline(buffer, buffer_size);
        safeGetline(vcfFile ,  buffer, buffer_size);
        if (buffer[0] == '#'){
            if (verbose){ clog << "skipping comment" << endl ;
                std::string s( buffer );
                if (s.find_last_of("\r") > 0){
                    clog << "this file contains Windows / MacOS specific end-of-line character \'\\r\'\n" <<  \
                        "@ position:\t" <<  s.find_last_of("\r") << "\n" << \
                        "consider running `dos2unix` or similar!" << endl;
                }
            }
            continue;
        }
        int field_count = 0;
        string current_chr;
        // read vcf file:
        // `i` -- runs for symbols
        // `field_count` -- runs for fields
        for (size_t i = 0; field_count< 2 && i < buffer_size && buffer[i] != '\0' && buffer[i] != '\n'; i++) {
            if (field_count == 0 && buffer[i] != '\t') {
                current_chr += buffer[i];
            }

            if (field_count == 1 && buffer[i - 1] == '\t') { //start: pos column
                pos = atoi(&buffer[i]) - 1;
                break;
            } // end: pos column
            if (buffer[i] == '\t') {
                field_count++;
            }
        }
        // process chromosome:
        if (verbose) { clog << "reading\t" << current_chr.c_str() << "\t" << pos << endl;}; 
        if ( chromosome_vector.count(current_chr.c_str()) > 0){ //found

            temp_chr_ref = chromosome_vector[current_chr.c_str()];
            if  (temp_chr_ref != chr_ref) { // new chromosome
                chr_ref = chromosome_vector[current_chr.c_str()];
                clog << endl; // "; getting next bam chromosome..." << endl;
                chr_bam = mapped_file->GetReferenceID( current_chr);
                if (chr_ref != chr_bam ){
                    cerr << "contig: " << current_chr << "; [fasta#:] " << chr_ref << "; [bam#:] " << chr_bam << ";  mismatch!" << endl;
                }
                if (verbose) {
                    cout << endl;
                    cout << "contig [fasta#:]\t " << chr_ref + 1 \
                    << "\t[bam#:]\t" << chr_bam + 1 \
                    << "\t[vcf:]\t" << current_chr.c_str() << "\t[fasta:]\t " << fasta->contig_name[chr_ref] << endl;
                } else {
                    clog << "contig # " << chr_ref+1 << endl;
                }
                ref = fasta->getChr(chr_ref);

                if (db_flag) {
                    if (transaction_flag){
                        clog << "commiting" << endl;
                        if (!db) {  throw std::range_error("null pointer to the database");    };
                        db->intermediate_commit();
                    } else {
                        if (!db) {  throw std::range_error("null pointer to the database");    };
                        db->new_transaction();
                        transaction_flag = true;
                    }
                    db->place_contig_table_record( chr_ref, current_chr);
                }
                n_snp = 0;
           }
             
        } else if (broken_chromosome.compare(current_chr)!=0){
            broken_chromosome = current_chr;
            cerr << endl << "Contig not found: \"\t" << broken_chromosome << "\t\"" << endl;
            continue;
        } else {
            if (verbose){ cerr << "\rskipping:\t" << current_chr << "\t" << pos << endl; }
            continue;
        }

        // process position
        if (verbose){
           if (num_test && (n_snp >= num_test )){
                continue;
           } else {
                n_snp ++;
           }
          //  clog << endl;
            // the info will be printed later in the `process_snp` routine
        } else {
            clog << "\r" << setfill(' ') << setw(8) << pos+1;
        }
   
        cov->reset(current_chr, chr_ref, pos); // cov->reset(chr_ref, pos);

        if (! process_snp(cov, ref, mapped_file, chr_ref, chr_bam) ){
            if (verbose>1) {
                throw std::logic_error(" error while processing a snp!!! ");
            } else {
                cerr << "  error while processing a snp! skipping... " << endl;
            }
        }
        if (db_flag){
            db->print_cov_db( *cov );
        } else { cov->print_cov(chr_ref, plotFile); }

        cov->estimate( read_length );

        if ( !(n_snp % COMMIT_EACH) && n_snp >0 ){
            clog << " | commiting" << endl;
            db->intermediate_commit();
       }
        // finally:       
        // vcfFile.getline(buffer, buffer_size);
    }
    clog << endl << "VCF file `" << snpfile.c_str() << "` has been successfully processed" << endl;
    vcfFile.close();

    if (db_flag){
        db->place_register_record( );
        db->composite_index();
         // xct->rollback(); // sqlite
        db->commit();
    } else  fclose(plotFile);
}
bool CoverageConnector::storeMetaData(IlwisObject *obj, IlwisTypes type, const DataDefinition& datadef)
{
    bool ok = Ilwis3Connector::storeMetaData(obj, type);
    if ( !ok)
        return false;

    Coverage *coverage = static_cast<Coverage *>(obj);

    const ICoordinateSystem csy = coverage->coordinateSystem();
    if (!csy.isValid())
        return ERROR2(ERR_NO_INITIALIZED_2, "CoordinateSystem", coverage->name());

    QString localName = Resource::toLocalFile(csy->source().url(),true);
    if ( localName == sUNDEF) {
        localName = CoordinateSystemConnector::createCsyFromCode(csy->code());
    }
    if ( localName == sUNDEF) {
        return ERROR2(ERR_NO_INITIALIZED_2, "CoordinateSystem", coverage->name());
    }
    _odf->setKeyValue("BaseMap","CoordSystem", localName);
    Box2D<double> bounds = coverage->envelope();
    if(!bounds.isValid())
        return ERROR2(ERR_NO_INITIALIZED_2, "Bounds", coverage->name());

    _odf->setKeyValue("BaseMap","CoordBounds",QString("%1 %2 %3 %4").
                      arg(bounds.min_corner().x(),10,'f').
                      arg(bounds.min_corner().y(),10,'f').
                      arg(bounds.max_corner().x(),10,'f').
                      arg(bounds.max_corner().y(),10,'f'));

    const IDomain dom = datadef.domain();
    if (!dom.isValid())
        return ERROR2(ERR_NO_INITIALIZED_2, "Domain", coverage->name());

    calcStatics(obj,NumericStatistics::pBASIC);
    if ( dom->ilwisType() == itNUMERICDOMAIN) {

        quint16 digits = coverage->statistics().significantDigits();
        qint32 delta = coverage->statistics()[NumericStatistics::pDELTA];
        if ( delta >= 0 && delta < 256 && digits == 0){
            if ( delta >= 0 && delta < 256 && digits == 0){
                if ( datadef.domain()->code() == "boolean"){
                    QString domInfo = QString("bool.dom;Byte;bool;0;;");
                    _odf->setKeyValue("BaseMap","DomainInfo",domInfo);
                    _odf->setKeyValue("BaseMap","Range","0:1:offset=-1");
                    _odf->setKeyValue("BaseMap","Domain","bool.dom");
                }
                else{
                    QString domInfo = QString("Image.dom;Byte;image;0;;");
                    _odf->setKeyValue("BaseMap","DomainInfo",domInfo);
                    _odf->setKeyValue("BaseMap","Range","0:255:offset=0");
                    _odf->setKeyValue("BaseMap","MinMax","0:255");
                    _odf->setKeyValue("BaseMap","Domain","Image.dom");
                }
            }
        }
        else {
            const NumericStatistics& stats = coverage->statistics();
            int digits = stats.significantDigits();
            RawConverter conv(stats[NumericStatistics::pMIN], stats[NumericStatistics::pMAX],pow(10, - digits));
            QString rangeString = QString("%1:%2:%3:offset=%4").arg(stats[NumericStatistics::pMIN]).arg(stats[NumericStatistics::pMAX]).arg(conv.scale()).arg(conv.offset());
            _odf->setKeyValue("BaseMap","Range",rangeString);
            _odf->setKeyValue("BaseMap","Domain","value.dom");

            _odf->setKeyValue("BaseMap","MinMax",QString("%1:%2").arg(stats[NumericStatistics::pMIN]).arg(stats[NumericStatistics::pMAX]));
            QString domInfo = QString("value.dom;Long;value;0;-9999999.9:9999999.9:0.1:offset=0");
            _odf->setKeyValue("BaseMap","DomainInfo",domInfo);
        }
    } if ( dom->ilwisType() == itITEMDOMAIN) {
        QString source = Resource::toLocalFile(dom->source().url(), true);
        if ( dom->valueType() == itTHEMATICITEM && coverage->ilwisType() == itRASTER) {
            IThematicDomain themdom = dom.get<ThematicDomain>();
            if ( themdom.isValid()) {
                QString domInfo = QString("%1;Byte;class;%2;;").arg(source).arg(themdom->count());
                _odf->setKeyValue("BaseMap","DomainInfo",domInfo);
                _odf->setKeyValue("BaseMap","Domain",source);
            }
        } else if(dom->valueType() == itINDEXEDITEM) {
            QString domName = _odf->fileinfo().fileName();
            QString domInfo = QString("%1;Long;UniqueID;0;;").arg(domName);
            _odf->setKeyValue("BaseMap","DomainInfo",domInfo);
            _odf->setKeyValue("BaseMap","Domain",domName);
        } else if ( dom->valueType() == itNAMEDITEM) {
            INamedIdDomain iddom = dom.get<NamedIdDomain>();
            QString domName = _odf->fileinfo().fileName();
            int index;
            if ( (index=domName.lastIndexOf("."))!= -1)             {
                domName = domName.left(index);
            }
            QString domInfo = QString("%1;;Int;id;%2;;").arg(domName).arg(iddom->count());
            _odf->setKeyValue("BaseMap","DomainInfo",domInfo);
            _odf->setKeyValue("BaseMap","Domain",domName);
            iddom->connectTo(QUrl(),"domain","ilwis3", IlwisObject::cmOUTPUT);
            iddom->store(Ilwis::IlwisObject::smMETADATA | Ilwis::IlwisObject::smBINARYDATA);
        }
    }

    ITable attTable = coverage->attributeTable();
    if ( attTable.isValid()) {
        QScopedPointer<TableConnector> conn(createTableConnector(attTable, coverage, type));
        conn->storeMetaData(attTable.ptr());
    }
    return true;
}
예제 #12
0
/*==============================================================================
 * FUNCTION:      processProc
 * OVERVIEW:      Process a procedure, given a native (source machine) address.
 * PARAMETERS:    address - the address at which the procedure starts
 *                delta - the offset of the above address from the logical
 *                  address at which the procedure starts (i.e. the one
 *                  given by dis)
 *                uUpper - the highest address of the text segment
 *                pProc - the procedure object
 *                decoder - NJMCDecoder object
 * RETURNS:       <nothing>
 *============================================================================*/
void processProc(ADDRESS uAddr, ptrdiff_t delta, ADDRESS uUpper, UserProc* pProc,
                 NJMCDecoder& decoder)
{
    PBB pBB;                    // Pointer to the current basic block
    INSTTYPE type;              // Cfg type of instruction (e.g. IRET)

    // Declare a queue of targets not yet processed yet. This has to be
    // individual to the procedure!
    TARGETS targets;

    // Indicates whether or not the next instruction to be decoded is the
    // lexical successor of the current one. Will be true for all NCTs and for
    // CTIs with a fall through branch.
    bool sequentialDecode = true;

    Cfg* pCfg = pProc->getCFG();

    // Initialise the queue of control flow targets that have yet to be decoded.
    targets.push(uAddr);

    // Clear the pointer used by the caller prologue code to access the last
    // call rtl of this procedure
    //decoder.resetLastCall();

    while ((uAddr = nextAddress(targets, pCfg)) != 0)
        {

            // The list of RTLs for the current basic block
            list<HRTL*>* BB_rtls = new list<HRTL*>();

            // Keep decoding sequentially until a CTI without a fall through branch
            // is decoded
            ADDRESS start = uAddr;
            DecodeResult inst;
            while (sequentialDecode)
                {

                    // Decode and classify the current instruction
                    if (progOptions.trace)
                        cout << "*" << hex << uAddr << "\t" << flush;

                    // Decode the inst at uAddr.
                    inst = decoder.decodeInstruction(uAddr, delta, pProc);

                    // Need to construct a new list of RTLs if a basic block has just
                    // been finished but decoding is continuing from its lexical
                    // successor
                    if (BB_rtls == NULL)
                        BB_rtls = new list<HRTL*>();

                    HRTL* pRtl = inst.rtl;
                    if (inst.numBytes == 0)
                        {
                            // An invalid instruction. Most likely because a call did
                            // not return (e.g. call _exit()), etc. Best thing is to
                            // emit a INVALID BB, and continue with valid instructions
                            ostrstream ost;
                            ost << "invalid instruction at " << hex << uAddr;
                            warning(str(ost));
                            // Emit the RTL anyway, so we have the address and maybe
                            // some other clues
                            BB_rtls->push_back(new RTL(uAddr));
                            pBB = pCfg->newBB(BB_rtls, INVALID, 0);
                            sequentialDecode = false;
                            BB_rtls = NULL;
                            continue;
                        }

                    HLJump* rtl_jump = static_cast<HLJump*>(pRtl);

                    // Display RTL representation if asked
                    if (progOptions.rtl) pRtl->print();

                    ADDRESS uDest;

                    switch (pRtl->getKind())
                        {

                        case JUMP_HRTL:
                        {
                            uDest = rtl_jump->getFixedDest();

                            // Handle one way jumps and computed jumps separately
                            if (uDest != NO_ADDRESS)
                                {
                                    BB_rtls->push_back(pRtl);
                                    sequentialDecode = false;

                                    pBB = pCfg->newBB(BB_rtls,ONEWAY,1);

                                    // Exit the switch now and stop decoding sequentially if the
                                    // basic block already existed
                                    if (pBB == 0)
                                        {
                                            sequentialDecode = false;
                                            BB_rtls = NULL;
                                            break;
                                        }

                                    // Add the out edge if it is to a destination within the
                                    // procedure
                                    if (uDest < uUpper)
                                        {
                                            visit(pCfg, uDest, targets, pBB);
                                            pCfg->addOutEdge(pBB, uDest, true);
                                        }
                                    else
                                        {
                                            ostrstream ost;
                                            ost << "Error: Instruction at " << hex << uAddr;
                                            ost << " branches beyond end of section, to ";
                                            ost << uDest;
                                            error(str(ost));
                                        }
                                }
                            break;
                        }

                        case NWAYJUMP_HRTL:
                        {
                            BB_rtls->push_back(pRtl);
                            // We create the BB as a COMPJUMP type, then change
                            // to an NWAY if it turns out to be a switch stmt
                            pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0);
                            if (isSwitch(pBB, rtl_jump->getDest(), pProc, pBF))
                                {
                                    processSwitch(pBB, delta, pCfg, targets, pBF);
                                }
                            else   // Computed jump
                                {
                                    // Not a switch statement
                                    ostrstream ost;
                                    string sKind("JUMP");
                                    if (type == I_COMPCALL) sKind = "CALL";
                                    ost << "COMPUTED " << sKind << " at "
                                        << hex << uAddr << endl;
                                    warning(str(ost));
                                    BB_rtls = NULL;    // New HRTLList for next BB
                                }
                            sequentialDecode = false;
                            break;
                        }



                        case JCOND_HRTL:
                        {
                            uDest = rtl_jump->getFixedDest();
                            BB_rtls->push_back(pRtl);
                            pBB = pCfg->newBB(BB_rtls, TWOWAY, 2);

                            // Stop decoding sequentially if the basic block already existed
                            // otherwise complete the basic block
                            if (pBB == 0)
                                sequentialDecode = false;
                            else
                                {

                                    // Add the out edge if it is to a destination within the
                                    // procedure
                                    if (uDest < uUpper)
                                        {
                                            visit(pCfg, uDest, targets, pBB);
                                            pCfg->addOutEdge(pBB, uDest, true);
                                        }
                                    else
                                        {
                                            ostrstream ost;
                                            ost << "Error: Instruction at " << hex << uAddr;
                                            ost << " branches beyond end of section, to ";
                                            ost << uDest;
                                            error(str(ost));
                                        }

                                    // Add the fall-through outedge
                                    pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
                                }

                            // Create the list of RTLs for the next basic block and continue
                            // with the next instruction.
                            BB_rtls = NULL;
                            break;
                        }

                        case CALL_HRTL:
                        {
                            HLCall* call = static_cast<HLCall*>(pRtl);

                            // Treat computed and static calls seperately
                            if (call->isComputed())
                                {
                                    BB_rtls->push_back(pRtl);
                                    pBB = pCfg->newBB(BB_rtls, COMPCALL, 1);

                                    // Stop decoding sequentially if the basic block already
                                    // existed otherwise complete the basic block
                                    if (pBB == 0)
                                        sequentialDecode = false;
                                    else
                                        pCfg->addOutEdge(pBB, uAddr + inst.numBytes);

                                }
                            else        // Static call
                                {

                                    BB_rtls->push_back(pRtl);

                                    // Find the address of the callee.
                                    ADDRESS uNewAddr = call->getFixedDest();

                                    // Add this non computed call site to the set of call
                                    // sites which need to be analysed later.
                                    pCfg->addCall(call);

                                    // Record the called address as the start of a new
                                    // procedure if it didn't already exist.
                                    if ((uNewAddr != NO_ADDRESS) &&
                                            prog.findProc(uNewAddr) == NULL)
                                        {
                                            prog.visitProc(uNewAddr);
                                            if (progOptions.trace)
                                                cout << "p" << hex << uNewAddr << "\t" << flush;
                                        }

                                    // Check if this is the _exit function. May prevent us from
                                    // attempting to decode invalid instructions.
                                    char* name = prog.pBF->SymbolByAddress(uNewAddr);
                                    if (name && strcmp(name, "_exit") == 0)
                                        {
                                            // Create the new basic block
                                            pBB = pCfg->newBB(BB_rtls, CALL, 0);

                                            // Stop decoding sequentially
                                            sequentialDecode = false;
                                        }
                                    else
                                        {
                                            // Create the new basic block
                                            pBB = pCfg->newBB(BB_rtls, CALL, 1);

                                            if (call->isReturnAfterCall())
                                                {
                                                    // Constuct the RTLs for the new basic block
                                                    list<HRTL*>* rtls = new list<HRTL*>();
                                                    // The only RTL in the basic block is a high level
                                                    // return that doesn't have any RTs.
                                                    rtls->push_back(new HLReturn(0, NULL));

                                                    BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0);
                                                    // Add out edge from call to return
                                                    pCfg->addOutEdge(pBB, returnBB);
                                                    // Put a label on the return BB (since it's an
                                                    // orphan); a jump will be reqd
                                                    pCfg->setLabel(returnBB);
                                                    pBB->setJumpReqd();
                                                    // Give the enclosing proc a dummy callee epilogue
                                                    pProc->setEpilogue(new CalleeEpilogue("__dummy",
                                                                                          list<string>()));
                                                    // Mike: do we need to set return locations?
                                                    // This ends the function
                                                    sequentialDecode = false;
                                                }
                                            else
                                                {
                                                    // Add the fall through edge if the block didn't
                                                    // already exist
                                                    if (pBB != NULL)
                                                        pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
                                                }
                                        }
                                }

                            // Create the list of RTLs for the next basic block and continue
                            // with the next instruction.
                            BB_rtls = NULL;
                            break;
                        }

                        case RET_HRTL:
                            // Stop decoding sequentially
                            sequentialDecode = false;

                            // Add the RTL to the list
                            BB_rtls->push_back(pRtl);
                            // Create the basic block
                            pBB = pCfg->newBB(BB_rtls, RET, 0);

                            // Create the list of RTLs for the next basic block and continue
                            // with the next instruction.
                            BB_rtls = NULL;    // New HRTLList for next BB
                            break;

                        case SCOND_HRTL:
                            // This is just an ordinary instruction; no control transfer
                            // Fall through
                        case LOW_LEVEL_HRTL:
                            // We must emit empty RTLs for NOPs, because they could be the
                            // destinations of jumps (and splitBB won't work)
                            // Just emit the current instr to the current BB
                            BB_rtls->push_back(pRtl);
                            break;

                        } // switch (pRtl->getKind())

                    uAddr += inst.numBytes;
                    // Update the RTL's number of bytes for coverage analysis (only)
                    inst.rtl->updateNumBytes(inst.numBytes);

                    // If sequentially decoding, check if the next address happens to
                    // be the start of an existing BB. If so, finish off the current BB
                    // (if any RTLs) as a fallthrough, and  no need to decode again
                    // (unless it's an incomplete BB, then we do decode it).
                    // In fact, mustn't decode twice, because it will muck up the
                    // coverage, but also will cause subtle problems like add a call
                    // to the list of calls to be processed, then delete the call RTL
                    // (e.g. Pentium 134.perl benchmark)
                    if (sequentialDecode && pCfg->existsBB(uAddr))
                        {
                            // Create the fallthrough BB, if there are any RTLs at all
                            if (BB_rtls)
                                {
                                    PBB pBB = pCfg->newBB(BB_rtls, FALL, 1);
                                    // Add an out edge to this address
                                    if (pBB)
                                        {
                                            pCfg->addOutEdge(pBB, uAddr);
                                            BB_rtls = NULL;         // Need new list of RTLs
                                        }
                                }
                            // Pick a new address to decode from, if the BB is complete
                            if (!pCfg->isIncomplete(uAddr))
                                sequentialDecode = false;
                        }

                }   // while sequentialDecode

            // Add this range to the coverage
            pProc->addRange(start, uAddr);

            // Must set sequentialDecode back to true
            sequentialDecode = true;

        }   // while nextAddress()

    // This pass is to remove up to 3 nops between ranges.
    // These will be assumed to be padding for alignments of BBs
    // Possibly removes a lot of ranges that could otherwise be combined
    ADDRESS a1, a2;
    COV_CIT ii;
    Coverage temp;
    if (pProc->getFirstGap(a1, a2, ii))
        {
            do
                {
                    int gap = a2 - a1;
                    if (gap < 8)
                        {
                            bool allNops = true;
                            for (int i=0; i < gap; i+= 2)
                                {
                                    // Beware endianness! getWord will work properly
                                    if (getWord(a1+i+delta) != 0x4e71)
                                        {
                                            allNops = false;
                                            break;
                                        }
                                }
                            if (allNops)
                                // Remove this gap, by adding a range equal to the gap
                                // Note: it's not safe to add the range now, so we put
                                // the range into a temp Coverage object to be added later
                                temp.addRange(a1, a2);
                        }
                }
            while (pProc->getNextGap(a1, a2, ii));
        }
    // Now add the ranges in temp
    pProc->addRanges(temp);

}
예제 #13
0
int main (int argc, char* argv[])
{
    string gff_file_path, bam_file_path, output_file_path;
    vector<string> stack_file_paths;

    // TODO allow multiple bam files?
    try
    {
        TCLAP::CmdLine cmd("Program description", ' ', VERSION);

        TCLAP::MultiArg<string> inputSTACKS("s", "stack-file", "Stack file", false, "foo.stacks", cmd);
        TCLAP::ValueArg<string> inputGFF("g", "gff-file", "Input GFF file", true, "", "input_file.gff", cmd);
        TCLAP::ValueArg<string> inputBAM("b", "bam-file", "Input BAM file", true, "", "input_file.bam", cmd);
        TCLAP::ValueArg<string> outputFileArg("o", "output", "Output file", true, "", "output.coverage", cmd);

        cmd.parse(argc, argv);

        gff_file_path = inputGFF.getValue();
        bam_file_path = inputBAM.getValue();
        stack_file_paths = inputSTACKS.getValue();
        output_file_path = outputFileArg.getValue();

    } catch (TCLAP::ArgException &e) {
        cerr << "Error: " << e.error() << " " << e.argId() << endl;
    }

    std::ostream* output_stream;
    std::ofstream output_file_stream;

    if (output_file_path == "-")
    {
        cerr << "Outputting to standard out." << endl;
        output_stream = &cout;
    }
    else
    {
        output_file_stream.open(output_file_path.c_str(),
                                std::ios::out | std::ios::trunc);

        if (!output_file_stream.is_open())
        {
            cerr << "Error opening output file. Exiting." << endl;
            return 0;
        }
        output_stream = &output_file_stream;
    }

    BamReader reader;
    if(!reader.Open(bam_file_path))
    {
        cerr << "Error opening the bam file. Exiting." << endl;
        return 0;
    }

    cerr << "Loading the reference GFF." << endl;

    // open GFF reference file
    std::ifstream gff_stream(gff_file_path.c_str());
    if (!gff_stream.is_open())
    {
        cerr << "Error opening reference GFF file. Exiting." << endl;
        return 0;
    }

    cerr << "Loading splice junctions from reference GFF." << endl;

    UniquePositionIndex junctions;
    ChildrenIndex exon_index;
    GFFReader gff_reader(gff_stream);
    Feature f;

    while (gff_reader.read(f))
    {
        if (f.isExonType())
        {
            exon_index.add(f);
        }
    }

    vector<string> IDs;
    exon_index.parentIDs(IDs);

    for (vector<string>::iterator ID = IDs.begin(); ID != IDs.end(); ++ID)
    {
        vector<Feature> exons;
        vector<Feature> juncs;
        exon_index.childrenOf(*ID, exons);

        getJunctions(exons, juncs);
        // TODO it's wasteful to have a juncs vector that just gets moved to the index
        //      it'd be nicer if the index implemented the same interface as the vector
        //      this means giving indexes iterators?
        //      c++ you're so complicated...

        for (vector<Feature>::iterator junc = juncs.begin(); junc != juncs.end(); ++junc)
        {
            junctions.add(*junc);
        }
    }

    cerr << "Loading splice junctions from stack files." << endl;

    // load splice junctions from stack files
    for (vector<string>::iterator it = stack_file_paths.begin();
         it != stack_file_paths.end(); ++it)
    {
        std::ifstream stack_stream(it->c_str());
        if (!stack_stream.is_open())
        {
            cerr << "Error opening stack file: " << *it << endl;
            cerr << "Skipping file." << endl;
        }
        else
        {
            StackReader stack_reader(stack_stream);
            Feature j;
            while (stack_reader.read(j))
            {
                junctions.add(j);
            }
        }
    }

    cerr << "Found " << junctions.count();
    cerr << " unique splice junctions." << endl;

    Coverage coverage;

    cerr << "Reading alignments and building coverage." << endl;

    // initialize references
    BamTools::RefVector ref_vec = reader.GetReferenceData();
    for (int i = 0; i < ref_vec.size(); ++i)
    {
        BamTools::RefData data = ref_vec.at(i);
        coverage.setMinReferenceLength(data.RefName, data.RefLength);
    }

    // read and filter alignments, adding to coverages
    Alignment al, mate;
    Feature junction;
    while (reader.GetNextAlignment(al))
    {
        if (al.IsPaired())
        {
            bool valid = true;
            if (al.getJunction(junction))
                valid = junctions.contains(junction);

            reader.GetNextAlignment(mate);

            if (mate.getJunction(junction))
                valid = valid && junctions.contains(junction);

            if (valid)
            {
                coverage.add(al);
                coverage.add(mate);
            }
        }
        else
        {
            if (al.getJunction(junction))
                coverage.add(al);
            else
                coverage.add(al);
        }
    }

    reader.Close();

    cerr << "Writing coverage file." << endl;

    coverage.toOutputStream(*output_stream);

    return 0;
}