Ejemplo n.º 1
0
//-------------------------------------------------------------------------
void XList::sortByElementNumber(String order){

	// Get the number of sessions per speaker
	LKVector spk(0,0);
	for(unsigned long i=0;i<_vector.size();i++){
		LKVector::type sps;
		sps.idx = i;
		sps.lk = _vector.getObject(i).getElementCount();
		spk.addValue(sps);
	}
	// Sort Xlines of the temporary XList by element number
	spk.descendingSort();

	// Copy the current RefVector<XLine> into a temporary one
	RefVector<XLine> tmpX;
	for(unsigned long i=0;i<_vector.size();i++){
		XLine *ll = new XLine(_vector.getObject(i));
		tmpX.addObject(*ll);
	}

	// Remove all elements from the XList
	_vector.deleteAllObjects();

	// Fill the XList according to the number of elements
	if(order == "descend"){
		for(unsigned long i=0;i<tmpX.size();i++){
			_vector.addObject(tmpX.getObject(spk[i].idx));
		}
	}
	else if(order == "ascend"){
		for(long i=tmpX.size()-1;i>=0;i--){
			_vector.addObject(tmpX.getObject(spk[i].idx));
		}
	}
}
Ejemplo n.º 2
0
long get_ref_lengths(int id, RefVector ref) {
	long length = 0;

	for (size_t i = 0; i < (size_t) id && i < ref.size(); i++) {
		length += (long) ref[i].RefLength + (long) Parameter::Instance()->max_dist;
	}
	return length;
}
Ejemplo n.º 3
0
GenomeFile::GenomeFile(const RefVector &genome) {
    for (size_t i = 0; i < genome.size(); ++i) {
        string chrom = genome[i].RefName;
        int length = genome[i].RefLength;
        
        _chromSizes[chrom] = length;
        _chromList.push_back(chrom);
    }
}
Ejemplo n.º 4
0
long Breakpoint::calc_pos(long pos, RefVector ref) {
	size_t i = 0;
	pos -= ref[i].RefLength;
	while (i < ref.size() && pos >= 0) {
		i++;
		pos -= ref[i].RefLength;
	}
	return pos + ref[i].RefLength;
}
Ejemplo n.º 5
0
long fuck_off(long pos, RefVector ref, std::string &chr) {
	size_t i = 0;
	pos -= (ref[i].RefLength + Parameter::Instance()->max_dist);

	while (i < ref.size() && pos >= 0) {
		i++;
		pos -= ((long) ref[i].RefLength + (long) Parameter::Instance()->max_dist);
	}
	chr = ref[i].RefName;
	return pos + ref[i].RefLength + (long) Parameter::Instance()->max_dist;
}
Ejemplo n.º 6
0
std::string Breakpoint::get_chr(long pos, RefVector ref) {
//	std::cout << "pos: " << pos << std::endl;
	size_t id = 0;
	while (id < ref.size() && pos >= 0) {
		pos -= (long) ref[id].RefLength;
		//	std::cout << id << std::endl;
		id++;
	}

	return ref[id - 1].RefName;
}
Ejemplo n.º 7
0
bool RandomTool::RandomToolPrivate::Run(void) {

    // set to default stdin if no input files provided
    if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
        m_settings->InputFiles.push_back(Options::StandardIn());

    // add files in the filelist to the input file list
    if ( m_settings->HasInputFilelist ) {

        ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
        if ( !filelist.is_open() ) {
            cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl;
            return false;
        }

        string line;
        while ( getline(filelist, line) )
            m_settings->InputFiles.push_back(line);
    }

    // open our reader
    BamMultiReader reader;
    if ( !reader.Open(m_settings->InputFiles) ) {
        cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl;
        return false;
    }

    // look up index files for all BAM files
    reader.LocateIndexes();

    // make sure index data is available
    if ( !reader.HasIndexes() ) {
        cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl;
        reader.Close();
        return false;
    }

    // get BamReader metadata
    const string headerText = reader.GetHeaderText();
    const RefVector references = reader.GetReferenceData();
    if ( references.empty() ) {
        cerr << "bamtools random ERROR: no reference data available... Aborting." << endl;
        reader.Close();
        return false;
    }

    // determine compression mode for BamWriter
    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
                              !m_settings->IsForceCompression );
    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
    if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;

    // open BamWriter
    BamWriter writer;
    writer.SetCompressionMode(compressionMode);
    if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) {
        cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename
             << " for writing... Aborting." << endl;
        reader.Close();
        return false;
    }

    // if user specified a REGION constraint, attempt to parse REGION string
    BamRegion region;
    if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
        cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl;
        cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
             << endl;
        reader.Close();
        writer.Close();
        return false;
    }

    // seed our random number generator
    srand( time(NULL) );

    // grab random alignments
    BamAlignment al;
    unsigned int i = 0;
    while ( i < m_settings->AlignmentCount ) {

        int randomRefId    = 0;
        int randomPosition = 0;

        // use REGION constraints to select random refId & position
        if ( m_settings->HasRegion ) {

            // select a random refId
            randomRefId = getRandomInt(region.LeftRefID, region.RightRefID);

            // select a random position based on randomRefId
            const int lowerBoundPosition = ( (randomRefId == region.LeftRefID)
                                             ? region.LeftPosition
                                             : 0 );
            const int upperBoundPosition = ( (randomRefId == region.RightRefID)
                                             ? region.RightPosition
                                             : (references.at(randomRefId).RefLength - 1) );
            randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
        }

        // otherwise select from all possible random refId & position
        else {

            // select random refId
            randomRefId = getRandomInt(0, (int)references.size() - 1);

            // select random position based on randomRefId
            const int lowerBoundPosition = 0;
            const int upperBoundPosition = references.at(randomRefId).RefLength - 1;
            randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
        }

        // if jump & read successful, save first alignment that overlaps random refId & position
        if ( reader.Jump(randomRefId, randomPosition) ) {
            while ( reader.GetNextAlignmentCore(al) ) {
                if ( al.RefID == randomRefId && al.Position >= randomPosition ) {
                    writer.SaveAlignment(al);
                    ++i;
                    break;
                }
            }
        }
    }

    // cleanup & exit
    reader.Close();
    writer.Close();
    return true;
}
Ejemplo n.º 8
0
// this has been copied from bamtools utilities, since it isn't in the API. Original file is bamtools_utilities.cpp.
// Like the rest of Bamtools, it is under the BSD license.
bool Filter::ParseRegionString(const string& regionString, BamRegion& region)
{
    // -------------------------------
    // parse region string
    
    // check first for empty string
    if ( regionString.empty() ) 
        return false;   
    
    // non-empty string, look for a colom
    size_t foundFirstColon = regionString.find(':');
    
    // store chrom strings, and numeric positions
    string chrom;
    int startPos;
    int stopPos;
    
    // no colon found
    // going to use entire contents of requested chromosome 
    // just store entire region string as startChrom name
    // use BamReader methods to check if its valid for current BAM file
    if ( foundFirstColon == string::npos ) {
        chrom = regionString;
        startPos   = 0;
        stopPos    = -1;
    }
    
    // colon found, so we at least have some sort of startPos requested
    else {
        
        // store start chrom from beginning to first colon
        chrom = regionString.substr(0,foundFirstColon);
        
        // look for ".." after the colon
        size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
        
        // no dots found
        // so we have a startPos but no range
        // store contents before colon as startChrom, after as startPos
        if ( foundRangeDots == string::npos ) {
            startPos   = atoi( regionString.substr(foundFirstColon+1).c_str() ); 
            stopPos    = -1;
        } 
        
        // ".." found, so we have some sort of range selected
        else {
            
            // store startPos between first colon and range dots ".."
            startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
            
            // look for second colon
            size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
            
            // no second colon found
            // so we have a "standard" chrom:start..stop input format (on single chrom)
            if ( foundSecondColon == string::npos ) {
                stopPos    = atoi( regionString.substr(foundRangeDots+2).c_str() );
            } else {
                return false;
            }
        }
    }
    
    // -------------------------------
    // validate reference IDs & genomic positions
    
    const RefVector references = getReferences();
    
    int RefID = -1;
    for(int i = 0; i < references.size(); i++) {
        if(references[i].RefName == chrom)
            RefID = i;
    }
    
    // if startRefID not found, return false
    if ( RefID == -1 ) {
        cerr << "Can't find chromosome'" << chrom << "'" << endl;
        return false;
    }
    
    // startPos cannot be greater than or equal to reference length
    const RefData& startReference = references.at(RefID);
    if ( startPos >= startReference.RefLength ) {
        cerr << "Start position (" << startPos << ") after end of the reference sequence (" << startReference.RefLength << ")" << endl;
        return false;
    }
    
    // stopPosition cannot be larger than reference length
    const RefData& stopReference = references.at(RefID);
    if ( stopPos > stopReference.RefLength ) {
        cerr << "Start position (" << stopPos << ") after end of the reference sequence (" << stopReference.RefLength << ")" << endl;
        return false;
    }

    // if no stopPosition specified, set to reference end
    if ( stopPos == -1 ) stopPos = stopReference.RefLength;
    
    // -------------------------------
    // set up Region struct & return
    
    region.LeftRefID     = RefID;
    region.LeftPosition  = startPos;
    region.RightRefID    = RefID;;
    region.RightPosition = stopPos;
    return true;
}
Ejemplo n.º 9
0
 bool check(const PropertyFilter& filter, const BamAlignment& al) {
   
     bool keepAlignment = true;
     const PropertyMap& properties = filter.Properties;
     PropertyMap::const_iterator propertyIter = properties.begin();
     PropertyMap::const_iterator propertyEnd  = properties.end();
     for ( ; propertyIter != propertyEnd; ++propertyIter ) {
       
         // check alignment data field depending on propertyName
         const string& propertyName = (*propertyIter).first;
         const PropertyFilterValue& valueFilter = (*propertyIter).second;
         
         if      ( propertyName == ALIGNMENTFLAG_PROPERTY )  keepAlignment &= valueFilter.check(al.AlignmentFlag);
         else if ( propertyName == CIGAR_PROPERTY ) {
             stringstream cigarSs;
             const vector<CigarOp>& cigarData = al.CigarData;
             if ( !cigarData.empty() ) {
                 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
                 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
                 vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
                 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
                     const CigarOp& op = (*cigarIter);
                     cigarSs << op.Length << op.Type;
                 }
                 keepAlignment &= valueFilter.check(cigarSs.str());
             }
         }
         else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
         else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
         else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
         else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
         else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
         else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
         else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
         else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
         else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
         else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
         else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
         else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
         else if ( propertyName == ISSINGLETON_PROPERTY ) {
             const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped();
             keepAlignment &= valueFilter.check(isSingleton);
         }
         else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
         else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
         else if ( propertyName == MATEREFERENCE_PROPERTY ) {
             if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
             BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
             const string& refName = filterToolReferences.at(al.MateRefID).RefName;
             keepAlignment &= valueFilter.check(refName);
         }
         else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
         else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
         else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
         else if ( propertyName == REFERENCE_PROPERTY ) {
             BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
             const string& refName = filterToolReferences.at(al.RefID).RefName;
             keepAlignment &= valueFilter.check(refName);
         }
         else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
         else BAMTOOLS_ASSERT_UNREACHABLE;
         
         // if alignment fails at ANY point, just quit and return false
         if ( !keepAlignment ) return false;
     }
   
     BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
     return keepAlignment;
 }
Ejemplo n.º 10
0
// ValidateReaders checks that all the readers point to BAM files representing
// alignments against the same set of reference sequences, and that the
// sequences are identically ordered.  If these checks fail the operation of
// the multireader is undefined, so we force program exit.
bool BamMultiReaderPrivate::ValidateReaders() const
{

    m_errorString.clear();

    // skip if 0 or 1 readers opened
    if (m_readers.empty() || (m_readers.size() == 1)) return true;

    // retrieve first reader
    const MergeItem& firstItem = m_readers.front();
    const BamReader* firstReader = firstItem.Reader;
    if (firstReader == 0) return false;

    // retrieve first reader's header data
    const SamHeader& firstReaderHeader = firstReader->GetHeader();
    const std::string& firstReaderSortOrder = firstReaderHeader.SortOrder;

    // retrieve first reader's reference data
    const RefVector& firstReaderRefData = firstReader->GetReferenceData();
    const int firstReaderRefCount = firstReader->GetReferenceCount();
    const int firstReaderRefSize = firstReaderRefData.size();

    // iterate over all readers
    std::vector<MergeItem>::const_iterator readerIter = m_readers.begin();
    std::vector<MergeItem>::const_iterator readerEnd = m_readers.end();
    for (; readerIter != readerEnd; ++readerIter) {
        const MergeItem& item = (*readerIter);
        BamReader* reader = item.Reader;
        if (reader == 0) continue;

        // get current reader's header data
        const SamHeader& currentReaderHeader = reader->GetHeader();
        const std::string& currentReaderSortOrder = currentReaderHeader.SortOrder;

        // check compatible sort order
        if (currentReaderSortOrder != firstReaderSortOrder) {
            const std::string message =
                std::string("mismatched sort order in ") + reader->GetFilename() + ", expected " +
                firstReaderSortOrder + ", but found " + currentReaderSortOrder;
            SetErrorString("BamMultiReader::ValidateReaders", message);
            return false;
        }

        // get current reader's reference data
        const RefVector currentReaderRefData = reader->GetReferenceData();
        const int currentReaderRefCount = reader->GetReferenceCount();
        const int currentReaderRefSize = currentReaderRefData.size();

        // init reference data iterators
        RefVector::const_iterator firstRefIter = firstReaderRefData.begin();
        RefVector::const_iterator firstRefEnd = firstReaderRefData.end();
        RefVector::const_iterator currentRefIter = currentReaderRefData.begin();

        // compare reference counts from BamReader ( & container size, in case of BR error)
        if ((currentReaderRefCount != firstReaderRefCount) ||
            (firstReaderRefSize != currentReaderRefSize)) {
            std::stringstream s;
            s << "mismatched reference count in " << reader->GetFilename() << ", expected "
              << firstReaderRefCount << ", but found " << currentReaderRefCount;
            SetErrorString("BamMultiReader::ValidateReaders", s.str());
            return false;
        }

        // this will be ok; we just checked above that we have identically-sized sets of references
        // here we simply check if they are all, in fact, equal in content
        while (firstRefIter != firstRefEnd) {
            const RefData& firstRef = (*firstRefIter);
            const RefData& currentRef = (*currentRefIter);

            // compare reference name & length
            if ((firstRef.RefName != currentRef.RefName) ||
                (firstRef.RefLength != currentRef.RefLength)) {
                std::stringstream s;
                s << "mismatched references found in" << reader->GetFilename()
                  << "expected: " << std::endl;

                // print first reader's reference data
                RefVector::const_iterator refIter = firstReaderRefData.begin();
                RefVector::const_iterator refEnd = firstReaderRefData.end();
                for (; refIter != refEnd; ++refIter) {
                    const RefData& entry = (*refIter);
                    std::stringstream s;
                    s << entry.RefName << ' ' << std::endl;
                }

                s << "but found: " << std::endl;

                // print current reader's reference data
                refIter = currentReaderRefData.begin();
                refEnd = currentReaderRefData.end();
                for (; refIter != refEnd; ++refIter) {
                    const RefData& entry = (*refIter);
                    s << entry.RefName << ' ' << entry.RefLength << std::endl;
                }

                SetErrorString("BamMultiReader::ValidateReaders", s.str());
                return false;
            }

            // update iterators
            ++firstRefIter;
            ++currentRefIter;
        }
    }

    // if we get here, everything checks out
    return true;
}