void CoverageView::refresh() { clear(); if (!_data || !_activeItem) return; ProfileContext::Type t = _activeItem->type(); TraceFunction* f = 0; if (t == ProfileContext::Function) f = (TraceFunction*) _activeItem; if (t == ProfileContext::FunctionCycle) f = (TraceFunction*) _activeItem; if (!f) return; _hc.clear(GlobalConfig::maxListCount()); SubCost realSum = f->inclusive()->subCost(_eventType); TraceFunctionList l; if (_showCallers) l = Coverage::coverage(f, Coverage::Caller, _eventType); else l = Coverage::coverage(f, Coverage::Called, _eventType); foreach(TraceFunction* f2, l) { Coverage* c = (Coverage*) f2->association(Coverage::Rtti); if (c && (c->inclusive()>0.0)) _hc.addCost(f2, SubCost(realSum * c->inclusive())); }
TEST(CoverageTest, Coverage_add_alignment) { Alignment a; a.RefName = "foo"; a.position(3); CigarOp op; a.CigarData.clear(); op.Type = 'M'; op.Length = 2; a.CigarData.push_back(op); op.Type = 'N'; op.Length = 3; a.CigarData.push_back(op); op.Type = 'M'; op.Length = 2; a.CigarData.push_back(op); Coverage c; c.add(a); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 1, 1, 0, 0, 0, 1, 1)); }
TEST(CoverageTest, load) { Coverage c; std::stringstream coverage_str("bar\t6\n1\n1\n1\n0\n0\n0\nfoo\t5\n0\n0\n1\n1\n0\n"); c.load(coverage_str); EXPECT_THAT(c.coverages.find("bar")->second, ElementsAre(1, 1, 1, 0, 0, 0)); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 1, 1, 0)); }
bool CoverageConnector::storeBinaryData(IlwisObject *obj, IlwisTypes tp) { Coverage *coverage = static_cast<Coverage *>(obj); ITable attTable = coverage->attributeTable(); if ( attTable.isValid()) { QScopedPointer<TableConnector> conn(createTableConnector(attTable, coverage, tp)); return conn->storeBinaryData(attTable.ptr()); } return false; }
void CoverageView::refresh() { clear(); setColumnWidth(0, 50); if (!_showCallers) setColumnWidth(1, 50); if (!_data || !_activeItem) return; TraceItem::CostType t = _activeItem->type(); TraceFunction* f = 0; if (t == TraceItem::Function) f = (TraceFunction*) _activeItem; if (t == TraceItem::FunctionCycle) f = (TraceFunction*) _activeItem; if (!f) return; TraceFunction* ff; TraceFunctionList l; _hc.clear(Configuration::maxListCount()); SubCost realSum = f->inclusive()->subCost(_costType); if (_showCallers) l = Coverage::coverage(f, Coverage::Caller, _costType); else l = Coverage::coverage(f, Coverage::Called, _costType); for (ff=l.first();ff;ff=l.next()) { Coverage* c = (Coverage*) ff->assoziation(Coverage::Rtti); if (c && (c->inclusive()>0.0)) _hc.addCost(ff, SubCost(realSum * c->inclusive())); } for(int i=0;i<_hc.realCount();i++) { ff = (TraceFunction*) _hc[i]; Coverage* c = (Coverage*) ff->assoziation(Coverage::Rtti); if (_showCallers) new CallerCoverageItem(this, c, f, _costType, _groupType); else new CalleeCoverageItem(this, c, f, _costType, _groupType); } if (_hc.hasMore()) { // a placeholder for all the functions skipped ... ff = (TraceFunction*) _hc[_hc.maxSize()-1]; Coverage* c = (Coverage*) ff->assoziation(Coverage::Rtti); if (_showCallers) new CallerCoverageItem(this, _hc.count() - _hc.maxSize(), c, f, _costType, _groupType); else new CalleeCoverageItem(this, _hc.count() - _hc.maxSize(), c, f, _costType, _groupType); } }
TEST(CoverageTest, Coverage_setMinReferenceLength) { Coverage c; EXPECT_EQ(0, c.coverages.size()); c.setMinReferenceLength("foo", 10); EXPECT_EQ(1, c.coverages.size()); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); c.setMinReferenceLength("foo", 5); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); }
bool CoverageConnector::loadMetaData(Ilwis::IlwisObject *data) { Ilwis3Connector::loadMetaData(data); Coverage *coverage = static_cast<Coverage *>(data); QString csyName = _odf->value("BaseMap","CoordSystem"); if ( csyName.toLower() == "latlonwgs84.csy") csyName = "code=epsg:4326"; ICoordinateSystem csy; if ( !csy.prepare(csyName)) { kernel()->issues()->log(csyName,TR("Coordinate system couldnt be initialized, defaulting to 'unknown'"),IssueObject::itWarning); QString resource = QString("ilwis://file/unknown.csy"); if (!csy.prepare(resource)) { kernel()->issues()->log(TR("Fallback to 'unknown failed', corrupt system files defintion")); return false; } } coverage->setCoordinateSystem(csy); QString attfile = _odf->value("BaseMap", "AttributeTable"); QString basemaptype = _odf->value("BaseMap", "Type"); // feature coverages always have an attribute table; rasters might have if ( basemaptype != "Map" || attfile != sUNDEF) { ITable attTable = prepareAttributeTable(attfile, basemaptype); if (!attTable.isValid()) return false; coverage->attributeTable(attTable); } QString cbounds = _odf->value("BaseMap","CoordBounds"); QStringList parts = cbounds.split(" "); if ( parts.size() == 4) { double minx = parts[0].toDouble(); double miny = parts[1].toDouble(); double maxx = parts[2].toDouble(); double maxy = parts[3].toDouble(); Box2D<double> env(Coordinate(minx, miny), Coordinate(maxx, maxy)); coverage->envelope(env); } else { kernel()->issues()->log(TR(ERR_INVALID_PROPERTY_FOR_2).arg("Coordinate boundaries", data->name()), IssueObject::itWarning); } return true; }
TEST(CoverageTest, toString) { // Note that references are output in sorted order on reference name Coverage c; c.setMinReferenceLength("foo", 5); c.setMinReferenceLength("bar", 6); c.add("foo", 3, 2); c.add("bar", 1, 3); std::string expected = "bar\t6\n1\n1\n1\n0\n0\n0\nfoo\t5\n0\n0\n1\n1\n0\n"; std::stringstream out; c.toOutputStream(out); EXPECT_EQ(expected, out.str()); std::string out_string; c.toString(out_string); EXPECT_EQ(expected, out_string); }
TEST(CoverageTest, Coverage_add) { Coverage c; c.add("foo", 2, 3); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 1, 1, 1)); c.add("foo", 2, 2); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 2, 2, 1)); c.add("foo", 6, 2); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 2, 2, 1, 0, 1, 1)); c.setMinReferenceLength("foo", 10); EXPECT_THAT(c.coverages.find("foo")->second, ElementsAre(0, 2, 2, 1, 0, 1, 1, 0, 0, 0)); }
void ParseSNP::parseVCF() { size_t COMMIT_EACH = 200; bool IMMEDIATE = false; ifstream vcfFile; vcfFile.open(snpfile.c_str(), ifstream::in); if (!vcfFile.good()) { clog << "SNP Parser: could not open file: " << snpfile.c_str() << endl; exit(0); } clog << "reading the VCF file" << endl; // vcfFile.getline(buffer, buffer_size); vector<int> tmp; string broken_chromosome = ""; // snps.resize(chromosome_vector.size(), tmp); Parser * mapped_file = 0; FastaParser * fasta = new FastaParser(reffile); string ref; FILE * plotFile = NULL; // sqlite3pp::transaction * xct; bool transaction_flag = false; if (db_flag){ clog << "writing to database: " << plot_file.c_str() << endl; db = new SqliteDb( plot_file.c_str() , verbose ); db->init_sql_table( sample_label ); db->init_register_table(); db->place_register_record_begin( snpfile , read_filename); db->init_contig_table(); if (!db) { throw std::range_error("null pointer to the database"); } else { clog<< "table has been successfully initialized\t" << db << endl;} ; } else { remove(plot_file.c_str()); plotFile = fopen(plot_file.c_str(), "a"); if (plotFile == NULL) { cerr << "Error in printing: The file or path that you set " // << output.c_str() << " is not valid. It can be that there is no disc space available." << endl; ios_base::failure("cannot open output file for writing!"); exit(0); } }; if (read_filename.find(".bam") != string::npos) { mapped_file = new BamParser(read_filename); } clog << "======================================" << endl; clog << mapped_file->get_header() << endl; clog << "======================================" << endl; // clog << "num of chr " << genome.size() << endl; // clog << "first chr size " << genome[0].size() << endl; clog << "`range` has been set to: " << range << endl; size_t chr_bam = 0; size_t temp_chr_ref = NA; size_t chr_ref = NA; int pos = 0; // int n_snp = 0; // <- global // ref = fasta->getChr(chr_ref); Coverage * cov; cov = new Coverage(range); while (!vcfFile.eof()) { // vcfFile.getline(buffer, buffer_size); safeGetline(vcfFile , buffer, buffer_size); if (buffer[0] == '#'){ if (verbose){ clog << "skipping comment" << endl ; std::string s( buffer ); if (s.find_last_of("\r") > 0){ clog << "this file contains Windows / MacOS specific end-of-line character \'\\r\'\n" << \ "@ position:\t" << s.find_last_of("\r") << "\n" << \ "consider running `dos2unix` or similar!" << endl; } } continue; } int field_count = 0; string current_chr; // read vcf file: // `i` -- runs for symbols // `field_count` -- runs for fields for (size_t i = 0; field_count< 2 && i < buffer_size && buffer[i] != '\0' && buffer[i] != '\n'; i++) { if (field_count == 0 && buffer[i] != '\t') { current_chr += buffer[i]; } if (field_count == 1 && buffer[i - 1] == '\t') { //start: pos column pos = atoi(&buffer[i]) - 1; break; } // end: pos column if (buffer[i] == '\t') { field_count++; } } // process chromosome: if (verbose) { clog << "reading\t" << current_chr.c_str() << "\t" << pos << endl;}; if ( chromosome_vector.count(current_chr.c_str()) > 0){ //found temp_chr_ref = chromosome_vector[current_chr.c_str()]; if (temp_chr_ref != chr_ref) { // new chromosome chr_ref = chromosome_vector[current_chr.c_str()]; clog << endl; // "; getting next bam chromosome..." << endl; chr_bam = mapped_file->GetReferenceID( current_chr); if (chr_ref != chr_bam ){ cerr << "contig: " << current_chr << "; [fasta#:] " << chr_ref << "; [bam#:] " << chr_bam << "; mismatch!" << endl; } if (verbose) { cout << endl; cout << "contig [fasta#:]\t " << chr_ref + 1 \ << "\t[bam#:]\t" << chr_bam + 1 \ << "\t[vcf:]\t" << current_chr.c_str() << "\t[fasta:]\t " << fasta->contig_name[chr_ref] << endl; } else { clog << "contig # " << chr_ref+1 << endl; } ref = fasta->getChr(chr_ref); if (db_flag) { if (transaction_flag){ clog << "commiting" << endl; if (!db) { throw std::range_error("null pointer to the database"); }; db->intermediate_commit(); } else { if (!db) { throw std::range_error("null pointer to the database"); }; db->new_transaction(); transaction_flag = true; } db->place_contig_table_record( chr_ref, current_chr); } n_snp = 0; } } else if (broken_chromosome.compare(current_chr)!=0){ broken_chromosome = current_chr; cerr << endl << "Contig not found: \"\t" << broken_chromosome << "\t\"" << endl; continue; } else { if (verbose){ cerr << "\rskipping:\t" << current_chr << "\t" << pos << endl; } continue; } // process position if (verbose){ if (num_test && (n_snp >= num_test )){ continue; } else { n_snp ++; } // clog << endl; // the info will be printed later in the `process_snp` routine } else { clog << "\r" << setfill(' ') << setw(8) << pos+1; } cov->reset(current_chr, chr_ref, pos); // cov->reset(chr_ref, pos); if (! process_snp(cov, ref, mapped_file, chr_ref, chr_bam) ){ if (verbose>1) { throw std::logic_error(" error while processing a snp!!! "); } else { cerr << " error while processing a snp! skipping... " << endl; } } if (db_flag){ db->print_cov_db( *cov ); } else { cov->print_cov(chr_ref, plotFile); } cov->estimate( read_length ); if ( !(n_snp % COMMIT_EACH) && n_snp >0 ){ clog << " | commiting" << endl; db->intermediate_commit(); } // finally: // vcfFile.getline(buffer, buffer_size); } clog << endl << "VCF file `" << snpfile.c_str() << "` has been successfully processed" << endl; vcfFile.close(); if (db_flag){ db->place_register_record( ); db->composite_index(); // xct->rollback(); // sqlite db->commit(); } else fclose(plotFile); }
bool CoverageConnector::storeMetaData(IlwisObject *obj, IlwisTypes type, const DataDefinition& datadef) { bool ok = Ilwis3Connector::storeMetaData(obj, type); if ( !ok) return false; Coverage *coverage = static_cast<Coverage *>(obj); const ICoordinateSystem csy = coverage->coordinateSystem(); if (!csy.isValid()) return ERROR2(ERR_NO_INITIALIZED_2, "CoordinateSystem", coverage->name()); QString localName = Resource::toLocalFile(csy->source().url(),true); if ( localName == sUNDEF) { localName = CoordinateSystemConnector::createCsyFromCode(csy->code()); } if ( localName == sUNDEF) { return ERROR2(ERR_NO_INITIALIZED_2, "CoordinateSystem", coverage->name()); } _odf->setKeyValue("BaseMap","CoordSystem", localName); Box2D<double> bounds = coverage->envelope(); if(!bounds.isValid()) return ERROR2(ERR_NO_INITIALIZED_2, "Bounds", coverage->name()); _odf->setKeyValue("BaseMap","CoordBounds",QString("%1 %2 %3 %4"). arg(bounds.min_corner().x(),10,'f'). arg(bounds.min_corner().y(),10,'f'). arg(bounds.max_corner().x(),10,'f'). arg(bounds.max_corner().y(),10,'f')); const IDomain dom = datadef.domain(); if (!dom.isValid()) return ERROR2(ERR_NO_INITIALIZED_2, "Domain", coverage->name()); calcStatics(obj,NumericStatistics::pBASIC); if ( dom->ilwisType() == itNUMERICDOMAIN) { quint16 digits = coverage->statistics().significantDigits(); qint32 delta = coverage->statistics()[NumericStatistics::pDELTA]; if ( delta >= 0 && delta < 256 && digits == 0){ if ( delta >= 0 && delta < 256 && digits == 0){ if ( datadef.domain()->code() == "boolean"){ QString domInfo = QString("bool.dom;Byte;bool;0;;"); _odf->setKeyValue("BaseMap","DomainInfo",domInfo); _odf->setKeyValue("BaseMap","Range","0:1:offset=-1"); _odf->setKeyValue("BaseMap","Domain","bool.dom"); } else{ QString domInfo = QString("Image.dom;Byte;image;0;;"); _odf->setKeyValue("BaseMap","DomainInfo",domInfo); _odf->setKeyValue("BaseMap","Range","0:255:offset=0"); _odf->setKeyValue("BaseMap","MinMax","0:255"); _odf->setKeyValue("BaseMap","Domain","Image.dom"); } } } else { const NumericStatistics& stats = coverage->statistics(); int digits = stats.significantDigits(); RawConverter conv(stats[NumericStatistics::pMIN], stats[NumericStatistics::pMAX],pow(10, - digits)); QString rangeString = QString("%1:%2:%3:offset=%4").arg(stats[NumericStatistics::pMIN]).arg(stats[NumericStatistics::pMAX]).arg(conv.scale()).arg(conv.offset()); _odf->setKeyValue("BaseMap","Range",rangeString); _odf->setKeyValue("BaseMap","Domain","value.dom"); _odf->setKeyValue("BaseMap","MinMax",QString("%1:%2").arg(stats[NumericStatistics::pMIN]).arg(stats[NumericStatistics::pMAX])); QString domInfo = QString("value.dom;Long;value;0;-9999999.9:9999999.9:0.1:offset=0"); _odf->setKeyValue("BaseMap","DomainInfo",domInfo); } } if ( dom->ilwisType() == itITEMDOMAIN) { QString source = Resource::toLocalFile(dom->source().url(), true); if ( dom->valueType() == itTHEMATICITEM && coverage->ilwisType() == itRASTER) { IThematicDomain themdom = dom.get<ThematicDomain>(); if ( themdom.isValid()) { QString domInfo = QString("%1;Byte;class;%2;;").arg(source).arg(themdom->count()); _odf->setKeyValue("BaseMap","DomainInfo",domInfo); _odf->setKeyValue("BaseMap","Domain",source); } } else if(dom->valueType() == itINDEXEDITEM) { QString domName = _odf->fileinfo().fileName(); QString domInfo = QString("%1;Long;UniqueID;0;;").arg(domName); _odf->setKeyValue("BaseMap","DomainInfo",domInfo); _odf->setKeyValue("BaseMap","Domain",domName); } else if ( dom->valueType() == itNAMEDITEM) { INamedIdDomain iddom = dom.get<NamedIdDomain>(); QString domName = _odf->fileinfo().fileName(); int index; if ( (index=domName.lastIndexOf("."))!= -1) { domName = domName.left(index); } QString domInfo = QString("%1;;Int;id;%2;;").arg(domName).arg(iddom->count()); _odf->setKeyValue("BaseMap","DomainInfo",domInfo); _odf->setKeyValue("BaseMap","Domain",domName); iddom->connectTo(QUrl(),"domain","ilwis3", IlwisObject::cmOUTPUT); iddom->store(Ilwis::IlwisObject::smMETADATA | Ilwis::IlwisObject::smBINARYDATA); } } ITable attTable = coverage->attributeTable(); if ( attTable.isValid()) { QScopedPointer<TableConnector> conn(createTableConnector(attTable, coverage, type)); conn->storeMetaData(attTable.ptr()); } return true; }
/*============================================================================== * FUNCTION: processProc * OVERVIEW: Process a procedure, given a native (source machine) address. * PARAMETERS: address - the address at which the procedure starts * delta - the offset of the above address from the logical * address at which the procedure starts (i.e. the one * given by dis) * uUpper - the highest address of the text segment * pProc - the procedure object * decoder - NJMCDecoder object * RETURNS: <nothing> *============================================================================*/ void processProc(ADDRESS uAddr, ptrdiff_t delta, ADDRESS uUpper, UserProc* pProc, NJMCDecoder& decoder) { PBB pBB; // Pointer to the current basic block INSTTYPE type; // Cfg type of instruction (e.g. IRET) // Declare a queue of targets not yet processed yet. This has to be // individual to the procedure! TARGETS targets; // Indicates whether or not the next instruction to be decoded is the // lexical successor of the current one. Will be true for all NCTs and for // CTIs with a fall through branch. bool sequentialDecode = true; Cfg* pCfg = pProc->getCFG(); // Initialise the queue of control flow targets that have yet to be decoded. targets.push(uAddr); // Clear the pointer used by the caller prologue code to access the last // call rtl of this procedure //decoder.resetLastCall(); while ((uAddr = nextAddress(targets, pCfg)) != 0) { // The list of RTLs for the current basic block list<HRTL*>* BB_rtls = new list<HRTL*>(); // Keep decoding sequentially until a CTI without a fall through branch // is decoded ADDRESS start = uAddr; DecodeResult inst; while (sequentialDecode) { // Decode and classify the current instruction if (progOptions.trace) cout << "*" << hex << uAddr << "\t" << flush; // Decode the inst at uAddr. inst = decoder.decodeInstruction(uAddr, delta, pProc); // Need to construct a new list of RTLs if a basic block has just // been finished but decoding is continuing from its lexical // successor if (BB_rtls == NULL) BB_rtls = new list<HRTL*>(); HRTL* pRtl = inst.rtl; if (inst.numBytes == 0) { // An invalid instruction. Most likely because a call did // not return (e.g. call _exit()), etc. Best thing is to // emit a INVALID BB, and continue with valid instructions ostrstream ost; ost << "invalid instruction at " << hex << uAddr; warning(str(ost)); // Emit the RTL anyway, so we have the address and maybe // some other clues BB_rtls->push_back(new RTL(uAddr)); pBB = pCfg->newBB(BB_rtls, INVALID, 0); sequentialDecode = false; BB_rtls = NULL; continue; } HLJump* rtl_jump = static_cast<HLJump*>(pRtl); // Display RTL representation if asked if (progOptions.rtl) pRtl->print(); ADDRESS uDest; switch (pRtl->getKind()) { case JUMP_HRTL: { uDest = rtl_jump->getFixedDest(); // Handle one way jumps and computed jumps separately if (uDest != NO_ADDRESS) { BB_rtls->push_back(pRtl); sequentialDecode = false; pBB = pCfg->newBB(BB_rtls,ONEWAY,1); // Exit the switch now and stop decoding sequentially if the // basic block already existed if (pBB == 0) { sequentialDecode = false; BB_rtls = NULL; break; } // Add the out edge if it is to a destination within the // procedure if (uDest < uUpper) { visit(pCfg, uDest, targets, pBB); pCfg->addOutEdge(pBB, uDest, true); } else { ostrstream ost; ost << "Error: Instruction at " << hex << uAddr; ost << " branches beyond end of section, to "; ost << uDest; error(str(ost)); } } break; } case NWAYJUMP_HRTL: { BB_rtls->push_back(pRtl); // We create the BB as a COMPJUMP type, then change // to an NWAY if it turns out to be a switch stmt pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0); if (isSwitch(pBB, rtl_jump->getDest(), pProc, pBF)) { processSwitch(pBB, delta, pCfg, targets, pBF); } else // Computed jump { // Not a switch statement ostrstream ost; string sKind("JUMP"); if (type == I_COMPCALL) sKind = "CALL"; ost << "COMPUTED " << sKind << " at " << hex << uAddr << endl; warning(str(ost)); BB_rtls = NULL; // New HRTLList for next BB } sequentialDecode = false; break; } case JCOND_HRTL: { uDest = rtl_jump->getFixedDest(); BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, TWOWAY, 2); // Stop decoding sequentially if the basic block already existed // otherwise complete the basic block if (pBB == 0) sequentialDecode = false; else { // Add the out edge if it is to a destination within the // procedure if (uDest < uUpper) { visit(pCfg, uDest, targets, pBB); pCfg->addOutEdge(pBB, uDest, true); } else { ostrstream ost; ost << "Error: Instruction at " << hex << uAddr; ost << " branches beyond end of section, to "; ost << uDest; error(str(ost)); } // Add the fall-through outedge pCfg->addOutEdge(pBB, uAddr + inst.numBytes); } // Create the list of RTLs for the next basic block and continue // with the next instruction. BB_rtls = NULL; break; } case CALL_HRTL: { HLCall* call = static_cast<HLCall*>(pRtl); // Treat computed and static calls seperately if (call->isComputed()) { BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, COMPCALL, 1); // Stop decoding sequentially if the basic block already // existed otherwise complete the basic block if (pBB == 0) sequentialDecode = false; else pCfg->addOutEdge(pBB, uAddr + inst.numBytes); } else // Static call { BB_rtls->push_back(pRtl); // Find the address of the callee. ADDRESS uNewAddr = call->getFixedDest(); // Add this non computed call site to the set of call // sites which need to be analysed later. pCfg->addCall(call); // Record the called address as the start of a new // procedure if it didn't already exist. if ((uNewAddr != NO_ADDRESS) && prog.findProc(uNewAddr) == NULL) { prog.visitProc(uNewAddr); if (progOptions.trace) cout << "p" << hex << uNewAddr << "\t" << flush; } // Check if this is the _exit function. May prevent us from // attempting to decode invalid instructions. char* name = prog.pBF->SymbolByAddress(uNewAddr); if (name && strcmp(name, "_exit") == 0) { // Create the new basic block pBB = pCfg->newBB(BB_rtls, CALL, 0); // Stop decoding sequentially sequentialDecode = false; } else { // Create the new basic block pBB = pCfg->newBB(BB_rtls, CALL, 1); if (call->isReturnAfterCall()) { // Constuct the RTLs for the new basic block list<HRTL*>* rtls = new list<HRTL*>(); // The only RTL in the basic block is a high level // return that doesn't have any RTs. rtls->push_back(new HLReturn(0, NULL)); BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0); // Add out edge from call to return pCfg->addOutEdge(pBB, returnBB); // Put a label on the return BB (since it's an // orphan); a jump will be reqd pCfg->setLabel(returnBB); pBB->setJumpReqd(); // Give the enclosing proc a dummy callee epilogue pProc->setEpilogue(new CalleeEpilogue("__dummy", list<string>())); // Mike: do we need to set return locations? // This ends the function sequentialDecode = false; } else { // Add the fall through edge if the block didn't // already exist if (pBB != NULL) pCfg->addOutEdge(pBB, uAddr + inst.numBytes); } } } // Create the list of RTLs for the next basic block and continue // with the next instruction. BB_rtls = NULL; break; } case RET_HRTL: // Stop decoding sequentially sequentialDecode = false; // Add the RTL to the list BB_rtls->push_back(pRtl); // Create the basic block pBB = pCfg->newBB(BB_rtls, RET, 0); // Create the list of RTLs for the next basic block and continue // with the next instruction. BB_rtls = NULL; // New HRTLList for next BB break; case SCOND_HRTL: // This is just an ordinary instruction; no control transfer // Fall through case LOW_LEVEL_HRTL: // We must emit empty RTLs for NOPs, because they could be the // destinations of jumps (and splitBB won't work) // Just emit the current instr to the current BB BB_rtls->push_back(pRtl); break; } // switch (pRtl->getKind()) uAddr += inst.numBytes; // Update the RTL's number of bytes for coverage analysis (only) inst.rtl->updateNumBytes(inst.numBytes); // If sequentially decoding, check if the next address happens to // be the start of an existing BB. If so, finish off the current BB // (if any RTLs) as a fallthrough, and no need to decode again // (unless it's an incomplete BB, then we do decode it). // In fact, mustn't decode twice, because it will muck up the // coverage, but also will cause subtle problems like add a call // to the list of calls to be processed, then delete the call RTL // (e.g. Pentium 134.perl benchmark) if (sequentialDecode && pCfg->existsBB(uAddr)) { // Create the fallthrough BB, if there are any RTLs at all if (BB_rtls) { PBB pBB = pCfg->newBB(BB_rtls, FALL, 1); // Add an out edge to this address if (pBB) { pCfg->addOutEdge(pBB, uAddr); BB_rtls = NULL; // Need new list of RTLs } } // Pick a new address to decode from, if the BB is complete if (!pCfg->isIncomplete(uAddr)) sequentialDecode = false; } } // while sequentialDecode // Add this range to the coverage pProc->addRange(start, uAddr); // Must set sequentialDecode back to true sequentialDecode = true; } // while nextAddress() // This pass is to remove up to 3 nops between ranges. // These will be assumed to be padding for alignments of BBs // Possibly removes a lot of ranges that could otherwise be combined ADDRESS a1, a2; COV_CIT ii; Coverage temp; if (pProc->getFirstGap(a1, a2, ii)) { do { int gap = a2 - a1; if (gap < 8) { bool allNops = true; for (int i=0; i < gap; i+= 2) { // Beware endianness! getWord will work properly if (getWord(a1+i+delta) != 0x4e71) { allNops = false; break; } } if (allNops) // Remove this gap, by adding a range equal to the gap // Note: it's not safe to add the range now, so we put // the range into a temp Coverage object to be added later temp.addRange(a1, a2); } } while (pProc->getNextGap(a1, a2, ii)); } // Now add the ranges in temp pProc->addRanges(temp); }
int main (int argc, char* argv[]) { string gff_file_path, bam_file_path, output_file_path; vector<string> stack_file_paths; // TODO allow multiple bam files? try { TCLAP::CmdLine cmd("Program description", ' ', VERSION); TCLAP::MultiArg<string> inputSTACKS("s", "stack-file", "Stack file", false, "foo.stacks", cmd); TCLAP::ValueArg<string> inputGFF("g", "gff-file", "Input GFF file", true, "", "input_file.gff", cmd); TCLAP::ValueArg<string> inputBAM("b", "bam-file", "Input BAM file", true, "", "input_file.bam", cmd); TCLAP::ValueArg<string> outputFileArg("o", "output", "Output file", true, "", "output.coverage", cmd); cmd.parse(argc, argv); gff_file_path = inputGFF.getValue(); bam_file_path = inputBAM.getValue(); stack_file_paths = inputSTACKS.getValue(); output_file_path = outputFileArg.getValue(); } catch (TCLAP::ArgException &e) { cerr << "Error: " << e.error() << " " << e.argId() << endl; } std::ostream* output_stream; std::ofstream output_file_stream; if (output_file_path == "-") { cerr << "Outputting to standard out." << endl; output_stream = &cout; } else { output_file_stream.open(output_file_path.c_str(), std::ios::out | std::ios::trunc); if (!output_file_stream.is_open()) { cerr << "Error opening output file. Exiting." << endl; return 0; } output_stream = &output_file_stream; } BamReader reader; if(!reader.Open(bam_file_path)) { cerr << "Error opening the bam file. Exiting." << endl; return 0; } cerr << "Loading the reference GFF." << endl; // open GFF reference file std::ifstream gff_stream(gff_file_path.c_str()); if (!gff_stream.is_open()) { cerr << "Error opening reference GFF file. Exiting." << endl; return 0; } cerr << "Loading splice junctions from reference GFF." << endl; UniquePositionIndex junctions; ChildrenIndex exon_index; GFFReader gff_reader(gff_stream); Feature f; while (gff_reader.read(f)) { if (f.isExonType()) { exon_index.add(f); } } vector<string> IDs; exon_index.parentIDs(IDs); for (vector<string>::iterator ID = IDs.begin(); ID != IDs.end(); ++ID) { vector<Feature> exons; vector<Feature> juncs; exon_index.childrenOf(*ID, exons); getJunctions(exons, juncs); // TODO it's wasteful to have a juncs vector that just gets moved to the index // it'd be nicer if the index implemented the same interface as the vector // this means giving indexes iterators? // c++ you're so complicated... for (vector<Feature>::iterator junc = juncs.begin(); junc != juncs.end(); ++junc) { junctions.add(*junc); } } cerr << "Loading splice junctions from stack files." << endl; // load splice junctions from stack files for (vector<string>::iterator it = stack_file_paths.begin(); it != stack_file_paths.end(); ++it) { std::ifstream stack_stream(it->c_str()); if (!stack_stream.is_open()) { cerr << "Error opening stack file: " << *it << endl; cerr << "Skipping file." << endl; } else { StackReader stack_reader(stack_stream); Feature j; while (stack_reader.read(j)) { junctions.add(j); } } } cerr << "Found " << junctions.count(); cerr << " unique splice junctions." << endl; Coverage coverage; cerr << "Reading alignments and building coverage." << endl; // initialize references BamTools::RefVector ref_vec = reader.GetReferenceData(); for (int i = 0; i < ref_vec.size(); ++i) { BamTools::RefData data = ref_vec.at(i); coverage.setMinReferenceLength(data.RefName, data.RefLength); } // read and filter alignments, adding to coverages Alignment al, mate; Feature junction; while (reader.GetNextAlignment(al)) { if (al.IsPaired()) { bool valid = true; if (al.getJunction(junction)) valid = junctions.contains(junction); reader.GetNextAlignment(mate); if (mate.getJunction(junction)) valid = valid && junctions.contains(junction); if (valid) { coverage.add(al); coverage.add(mate); } } else { if (al.getJunction(junction)) coverage.add(al); else coverage.add(al); } } reader.Close(); cerr << "Writing coverage file." << endl; coverage.toOutputStream(*output_stream); return 0; }