示例#1
0
bool GetNextAlignment(BamAlignment& al, BamMultiReader& reader, int32_t refID)
{
    bool good = reader.GetNextAlignmentCore(al);
    if(not good or al.RefID != refID)
        return false;
    else if(not al.IsMapped() or al.IsDuplicate() or al.IsFailedQC())
        return GetNextAlignment(al, reader, refID);
    else
        return true;
}
示例#2
0
// merges sorted temp BAM files into single sorted output BAM file
bool SortTool::SortToolPrivate::MergeSortedRuns(void) {
  
    // open up multi reader for all of our temp files
    // this might get broken up if we do a multi-pass system later ??
    BamMultiReader multiReader;
    if ( !multiReader.Open(m_tempFilenames) ) {
        cerr << "bamtools sort ERROR: could not open BamMultiReader for merging temp files... Aborting."
             << endl;
        return false;
    }

    // set sort order for merge
    if ( m_settings->IsSortingByName )
        multiReader.SetSortOrder(BamMultiReader::SortedByReadName);
    else
        multiReader.SetSortOrder(BamMultiReader::SortedByPosition);
    
    // open writer for our completely sorted output BAM file
    BamWriter mergedWriter;
    if ( !mergedWriter.Open(m_settings->OutputBamFilename, m_headerText, m_references) ) {
        cerr << "bamtools sort ERROR: could not open " << m_settings->OutputBamFilename
             << " for writing... Aborting." << endl;
        multiReader.Close();
        return false;
    }
    
    // while data available in temp files
    BamAlignment al;
    while ( multiReader.GetNextAlignmentCore(al) )
        mergedWriter.SaveAlignment(al);
  
    // close readers
    multiReader.Close();
    mergedWriter.Close();
    
    // delete all temp files
    vector<string>::const_iterator tempIter = m_tempFilenames.begin();
    vector<string>::const_iterator tempEnd  = m_tempFilenames.end();
    for ( ; tempIter != tempEnd; ++tempIter ) {
        const string& tempFilename = (*tempIter);
        remove(tempFilename.c_str());
    }
  
    return true;
}
示例#3
0
bool StatsTool::StatsToolPrivate::Run() {
  
    // opens the BAM files without checking for indexes 
    BamMultiReader reader;
    if ( !reader.Open(settings->InputFiles, false, true) ) {
        cerr << "Could not open input BAM file(s)... quitting." << endl;
        reader.Close();
        return false;
    }
    
    // plow through file, keeping track of stats
    BamAlignment al;
    while ( reader.GetNextAlignmentCore(al) )
        ProcessAlignment(al);
    
    // print stats
    PrintStats();
    
    // clean and exit
    reader.Close();
    return true; 
}
示例#4
0
bool RandomTool::RandomToolPrivate::Run(void) {

    // set to default stdin if no input files provided
    if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
        m_settings->InputFiles.push_back(Options::StandardIn());

    // add files in the filelist to the input file list
    if ( m_settings->HasInputFilelist ) {

        ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
        if ( !filelist.is_open() ) {
            cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl;
            return false;
        }

        string line;
        while ( getline(filelist, line) )
            m_settings->InputFiles.push_back(line);
    }

    // open our reader
    BamMultiReader reader;
    if ( !reader.Open(m_settings->InputFiles) ) {
        cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl;
        return false;
    }

    // look up index files for all BAM files
    reader.LocateIndexes();

    // make sure index data is available
    if ( !reader.HasIndexes() ) {
        cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl;
        reader.Close();
        return false;
    }

    // get BamReader metadata
    const string headerText = reader.GetHeaderText();
    const RefVector references = reader.GetReferenceData();
    if ( references.empty() ) {
        cerr << "bamtools random ERROR: no reference data available... Aborting." << endl;
        reader.Close();
        return false;
    }

    // determine compression mode for BamWriter
    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
                              !m_settings->IsForceCompression );
    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
    if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;

    // open BamWriter
    BamWriter writer;
    writer.SetCompressionMode(compressionMode);
    if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) {
        cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename
             << " for writing... Aborting." << endl;
        reader.Close();
        return false;
    }

    // if user specified a REGION constraint, attempt to parse REGION string
    BamRegion region;
    if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
        cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl;
        cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
             << endl;
        reader.Close();
        writer.Close();
        return false;
    }

    // seed our random number generator
    srand( time(NULL) );

    // grab random alignments
    BamAlignment al;
    unsigned int i = 0;
    while ( i < m_settings->AlignmentCount ) {

        int randomRefId    = 0;
        int randomPosition = 0;

        // use REGION constraints to select random refId & position
        if ( m_settings->HasRegion ) {

            // select a random refId
            randomRefId = getRandomInt(region.LeftRefID, region.RightRefID);

            // select a random position based on randomRefId
            const int lowerBoundPosition = ( (randomRefId == region.LeftRefID)
                                             ? region.LeftPosition
                                             : 0 );
            const int upperBoundPosition = ( (randomRefId == region.RightRefID)
                                             ? region.RightPosition
                                             : (references.at(randomRefId).RefLength - 1) );
            randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
        }

        // otherwise select from all possible random refId & position
        else {

            // select random refId
            randomRefId = getRandomInt(0, (int)references.size() - 1);

            // select random position based on randomRefId
            const int lowerBoundPosition = 0;
            const int upperBoundPosition = references.at(randomRefId).RefLength - 1;
            randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
        }

        // if jump & read successful, save first alignment that overlaps random refId & position
        if ( reader.Jump(randomRefId, randomPosition) ) {
            while ( reader.GetNextAlignmentCore(al) ) {
                if ( al.RefID == randomRefId && al.Position >= randomPosition ) {
                    writer.SaveAlignment(al);
                    ++i;
                    break;
                }
            }
        }
    }

    // cleanup & exit
    reader.Close();
    writer.Close();
    return true;
}
示例#5
0
int CountTool::Run(int argc, char* argv[]) { 

    // parse command line arguments
    Options::Parse(argc, argv, 1);

    // if no '-in' args supplied, default to stdin
    if ( !m_settings->HasInput ) 
        m_settings->InputFiles.push_back(Options::StandardIn());
    
    // open reader without index
    BamMultiReader reader;
    if (!reader.Open(m_settings->InputFiles, false, true)) {
        cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
        return 1;
    }

    // alignment counter
    BamAlignment al;
    int alignmentCount(0);
    
    // if no region specified, count entire file 
    if ( !m_settings->HasRegion ) {
        while ( reader.GetNextAlignmentCore(al) ) 
            ++alignmentCount;
    }
    
    // otherwise attempt to use region as constraint
    else {
        
        // if region string parses OK
        BamRegion region;
        if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {

            // attempt to re-open reader with index files
            reader.Close();
            bool openedOK = reader.Open(m_settings->InputFiles, true, true );
            
            // if error
            if ( !openedOK ) {
                cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
                return 1;
            }
            
            // if index data available, we can use SetRegion
            if ( reader.IsIndexLoaded() ) {
              
                // attempt to use SetRegion(), if failed report error
                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
                    cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
                    reader.Close();
                    return 1;
                } 
              
                // everything checks out, just iterate through specified region, counting alignments
                while ( reader.GetNextAlignmentCore(al) )
                    ++alignmentCount;
            } 
            
            // no index data available, we have to iterate through until we
            // find overlapping alignments
            else {
                while( reader.GetNextAlignmentCore(al) ) {
                    if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
                          (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
                    {
                        ++alignmentCount;
                    }
                }
            }
        } 
        
        // error parsing REGION string
        else {
            cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
            cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
            reader.Close();
            return 1;
        }
    }
    
    // print results 
    cout << alignmentCount << endl;
    
    // clean & exit
    reader.Close();
    return 0;
}
示例#6
0
int bbctools_create( BbcUtils::OptParser &optParser ) {
    const vector<string> cmdArgs = optParser.getArgs();

	// remove .bbc extension from bbc file root, if present
	string bbcfileRoot = optParser.getOptValue( "bbc" );
	int i = bbcfileRoot.size() - 4;
	if( i > 0 && bbcfileRoot.substr(i,4) == ".bbc" ) {
		bbcfileRoot = bbcfileRoot.substr(0,i);
	}
	bool f_bci = optParser.getOptBoolean("index");
	bool f_cbc = optParser.getOptBoolean("coarse");

	string targetRegions   = optParser.getOptValue("regions");
	string annotationFields = optParser.getOptValue( "annotationFields");
	vector<string> auxRegionSplit = BbcUtils::mapToPairList(annotationFields);

	string  sumstatsFile = optParser.getOptValue("sumStats");
	string  covstatsFile = optParser.getOptValue("covStats");
	string  readOrigFile = optParser.getOptValue("readOrigin");
	string  readType     = optParser.getOptValue("readType");
	string  covDepths    = optParser.getOptValue("covDepths","-");
	double  minPcCov     = optParser.getOptNumber("minPcCov");
	int32_t primerLength = optParser.getOptInteger( "primerLength", (readType == "AmpliSeq" ? 30 : 0) );
	int32_t maxE2eEndGap = optParser.getOptInteger( "e2eGap", (readType == "AmpliSeq" ? 2 : 0) );

	bool   autoCreateBamIndex = optParser.getOptBoolean("autoCreateBamIndex");
	bool     samdepth         = optParser.getOptBoolean("samdepth");
	int32_t  filterQuality    = optParser.getOptInteger("minMAPQ");
	int32_t  minAlignLength   = optParser.getOptInteger("minAlignLength");
	bool     filterDuplicates = optParser.getOptBoolean("noDups");
	bool     filterUnique     = optParser.getOptBoolean("unique");
	uint32_t skipFlag         = filterDuplicates ? 0x704 : 0x304;
	uint16_t minMapQuality    = filterUnique ? 1 : filterQuality;

	bool onlyOnTargetReads = optParser.getOptBoolean("onTargetReads");
	bool onlyOnTargetBases = optParser.getOptBoolean("onTargetBases");

	// possible future options
	bool invertOnTarget = false;

	// check basic valid argument values and combinations
	int numOuts  = !bbcfileRoot.empty() + !covstatsFile.empty() + !sumstatsFile.empty() + !readOrigFile.empty();
	int numPipes = (bbcfileRoot == "-") + (covstatsFile == "-") + (sumstatsFile == "-") + (readOrigFile == "-");
	if( numOuts == 0 && !f_bci && !f_cbc ) {
		bbcfileRoot = "-";	// default if no other output specified
	} else if( numPipes > 1 ) {
		cerr << "Error: bbctools create: Only one file output (--covStats, --sumStats, --readOrigin or --bbc) may be piped to STDOUT." << endl;
		return -1;
	} else if( samdepth && numOuts ) {
		cerr << "Error: bbctools create: --samdepth (-s) option may only be used without other output options." << endl;
		return -1;
	}
	// check if single argument is a BBC file and leave open for reading if so
	BbcView bbcView;
	bool haveBbcFile = cmdArgs.size() == 1 && bbcView.Open( cmdArgs[0], true );
	bbcView.SelectPrintStream( samdepth ? "SAMDEPTH" : "BBCVIEW" );

	// check distinction between default and explicit no target regions - only for BBC input
	bool explicitNoTargetRegions = false;
	if( targetRegions == "-" ) {
		explicitNoTargetRegions = haveBbcFile;
		targetRegions = "";
	}
	if( targetRegions.empty() ) {
		if( onlyOnTargetBases && explicitNoTargetRegions && !invertOnTarget ) {
			cerr << "Warning: bbctools create --onTargetBases (-b) option with --regions '-' produces no coverage." << endl;
		} else if( onlyOnTargetReads ) {
			cerr << "Error: bbctools create --onTargetReads (-r) option requires a --regions file." << endl;
			return -1;
		}
	}
	// check for legal BBC create options
	if( f_bci || f_cbc ) {
		if( (bbcfileRoot.empty() || bbcfileRoot == "-") && !haveBbcFile ) {
			string opt = f_bci ? "--index (-i)" : "--coarse (-c)";
			cerr << "Error: bbctools create "+opt+" option requires the --bbc (-B) option or a BBC source file." << endl;
			return -1;
		}
	}
	BamMultiReader bamReader;
	if( haveBbcFile ) {
		// warn for options that do not work with BBC input
		if( filterQuality > 0 || filterDuplicates || filterUnique || minAlignLength ) {
			cerr << "Warning: SAM flag, alignment length and MAPQ filters ignored for BBC source file." << endl;
		}
		if( samdepth ) {
			cerr << "Error: --samdepth option is not supported for BBC source files." << endl;
			return -1;
		}
		if( !readOrigFile.empty() ) {
			cerr << "Error: --readOrigin option is not supported for BBC source files." << endl;
			return -1;
		}
	} else {
		// check / open for multiple BAM file inputs
		if ( !bamReader.Open(cmdArgs) ) {
			if( cmdArgs.size() == 1 ) cerr << "ERROR: Could not read input BAM file:";
			else cerr << "ERROR: Could not read all input BAM files:";
			// get and clean up bamtools error msg
			string errMsg = bamReader.GetErrorString();
			size_t i = errMsg.find_first_of('\n');
			if( i != string::npos ) errMsg = errMsg.substr(i+1);
			i = errMsg.find("::");
			if( i != string::npos ) {
				i = errMsg.find(": ");
				if( i != string::npos ) errMsg = errMsg.substr(i+1);
			}
			errMsg = BbcUtils::stringTrim(errMsg);
			errMsg[0] = toupper(errMsg[0]);
			cerr << endl << errMsg << "." << endl;
			return 1;
		}
	}
	// grab reference list from either input source
	const RefVector &references = haveBbcFile ? bbcView.GetReferenceData() : bamReader.GetReferenceData();
	if( !references.size() ) {
		// Issue would already been detected if input was BBC file
		cerr << "ERROR: " << (cmdArgs.size() > 1 ? "One or more " : "");
		cerr << "BAM file contains unaligned reads (no references).\n";
		return 1;
	}
	// check/set up target regions input regions/region statistics output
	RegionCoverage *regions = NULL;
	string covstatsStaticFields;
	bool trackRegionBaseCov = !covDepths.empty();
	if( covstatsFile.empty() ) {
		trackRegionBaseCov = false;
		if( !annotationFields.empty() ) {
			cerr << "Warning: --annotationFields (A) option ignored without --covStats (-C) option." << endl;
		}
		if( !covDepths.empty() && covDepths != "-" ) {
			cerr << "Warning: --covDepths (-D) option ignored without --covStats (-C) option." << endl;
		}
		if( !readType.empty() ) {
			cerr << "Warning: --readType (-T) option ignored without --covStats (-C) option." << endl;
		}
		// read regions for input only and/or creating sumStats
		if( !targetRegions.empty() || explicitNoTargetRegions || !sumstatsFile.empty() ) {
			regions = new RegionCoverage(references);
		}
	} else if( readType == "trgreads" || readType == "amplicon" || readType == "AmpliSeq" ) {
		if( haveBbcFile ) {
			cerr << "Creation of read coverage requires BAM file input." << endl;
			return -1;
		}
		AmpliconRegionStatistics *ampRegionStats = new AmpliconRegionStatistics(references);
		ampRegionStats->SetGenericReads( readType == "trgreads" );
		ampRegionStats->SetSigFacCoverage( minPcCov/100 );
		ampRegionStats->SetMaxUpstreamPrimerStart( primerLength );
		ampRegionStats->SetMaxE2eEndDistance( maxE2eEndGap );
		covstatsStaticFields = "overlaps,";
		covstatsStaticFields += (minPcCov > 0) ? "fwd_cov,rev_cov" : "fwd_e2e,rev_e2e";
		covstatsStaticFields += ",total_reads,fwd_reads,rev_reads";
		regions = ampRegionStats;
	} else if( readType == "trgbases" ) {
		if( haveBbcFile && targetRegions.empty() && !explicitNoTargetRegions ) {
			cerr << "Warning: Assuming reference contigs for base coverage targets (=> option --regions -)" << endl;
		}
		RegionStatistics *regionStats = new RegionStatistics(references);
		covstatsStaticFields = "covered,uncov_5p,uncov_3p,ave_basereads,fwd_basereads,rev_basereads";
		trackRegionBaseCov = true;
		regions = regionStats;
	} else if( readType == "covdepth" || readType.empty() ) {
		// output (sorted) targets file with only covDepth stats (if any)
		regions = new RegionCoverage(references);
	} else {
		cerr << "Unknown read type '" << readType << "'" << endl;
		return -1;
	}
	// Load the input regions or default to whole reference contig targets
	if( regions ) {
		regions->SetCovAtDepths( covDepths == "-" ? "20,100,500" : covDepths );
		if( targetRegions.empty() ) {
			regions->SetWholeContigTargets();
			// set contigs as explicit regions means all reads will seen as on-target
			// for consistency these are inverted (for input from BBC)
			invertOnTarget = true;
		} else {
			string auxFieldIdx = auxRegionSplit.size() ? auxRegionSplit[0] : "";
			string errMsg = regions->Load( targetRegions, "BED", auxFieldIdx );
			if( !errMsg.empty() ) {
				cerr << "ERROR: " + errMsg + "\n";
				return 1;
			}
		}
		if( onlyOnTargetReads && haveBbcFile ) {
			cerr << "Error: bbctools create --onTargetReads option is not supported for BBC source file." << endl;
			return -1;
		}
	}
	//
	// Perform all bbctools create utilities
	//
	BbcCreate *bbcCreate = NULL;
	if( !bbcfileRoot.empty() && (bbcfileRoot != "-" || !haveBbcFile) ) {
		bbcCreate = new BbcCreate(references);
		if( bbcfileRoot != "-" && !bbcCreate->Open(bbcfileRoot+".bbc") ) {
			return 1;
		}
		bbcCreate->SetNoOffTargetPositions(onlyOnTargetBases);
	}
	bbcView.SetNoOffTargetPositions(onlyOnTargetBases);
	// Stream input to output creators
	if( haveBbcFile ) {
		// BBC reader and driver via BbcView object
		if( bbcfileRoot != "-" || !covstatsFile.empty() ) {
			// disable BbcView text stream if using for file creation
			bbcView.SelectPrintStream("NONE");
		}
		// process input BBC for just new BBC and target coverage (defer BCI/CBC)
		bbcView.SetBbcCreate(bbcCreate);
		bbcView.SetRegionCoverage(regions);
		// explicitNoTargetRegions intended for explicitly removing on-target coverage
		bbcView.SetInvertOnTarget(explicitNoTargetRegions ^ invertOnTarget);
		if( bbcCreate || regions || bbcfileRoot == "-" ) {
			bbcView.ReadAll();
		}
	} else {
		// Test read tracking option for file write
		TrackReads *readTracker = NULL;
		try {
			if( !readOrigFile.empty() )
				readTracker = new TrackReads( readOrigFile, regions );
		} catch( std::runtime_error & ) {
			cerr << "ERROR: Unable to write to read tracking file " << readOrigFile << endl;
			return 1;
		}
		// BAM reader, BaseCoverage driver, dispatching to BbcCreate and BbcView objects
		BaseCoverage baseCov(references);
		baseCov.SetRegionCoverage(regions);
		baseCov.SetBbcCreate(bbcCreate);
		baseCov.SetInvertOnTarget(invertOnTarget);
		if( bbcfileRoot == "-" ) {
			baseCov.SetBbcView(&bbcView);
		}
		// Certain options require that all reads are processed, invalidating other performance options
		bool trackAllReads = !sumstatsFile.empty() || readTracker;
		// Implicit set of onlyOnTargetReads for performance when only these reads are required
		bool useBaseCov = (bbcfileRoot == "-" || bbcCreate);
		if( !targetRegions.empty() && !trackAllReads ) {
			onlyOnTargetReads |= onlyOnTargetBases;
			if( samdepth || !useBaseCov ) onlyOnTargetReads = true;
		}
		useBaseCov |= trackRegionBaseCov;
		// do not allow jumping if sumStats option is used - need to count all reads
		bool bamReaderSetRegions = (s_useBamReaderJump && !trackAllReads);
		int trgContig = 0, trgSrtPos = 0, trgEndPos = 0;
		int minJumpLen = s_initialMinJumpLen;
		int maxReadLen = s_initialMaxReadLen;
		if( onlyOnTargetReads ) {
			// load/create BAM index files for targeted reading
			// Note: BamIndex::BAMTOOLS format performed very badly and cannot use mixed with BTI/BAI files
			if( bamReaderSetRegions && !bamReader.LocateIndexes() ) {
				string plural( cmdArgs.size() > 1 ? "s" : "" );
				if( autoCreateBamIndex ) {
					cerr << "Warning: Did not locate BAM index (BAI) file" << plural << ", creating bamtools version..." << endl;
					// to avoid bug use new instance of BamMultiReader
					BamMultiReader bamReader2;
					if( !bamReader2.Open(cmdArgs) || !bamReader2.CreateIndexes() ) {
						cerr << "WARNING: Failed to create BAM index file" << plural << "." << endl;
						bamReaderSetRegions = false;
					} else {
						if( cmdArgs.size() == 1 ) {
							cerr << "Successfully created BAM index file: " << BbcUtils::fileName(cmdArgs[0]) << ".bai" << endl;
						} else {
							cerr << "Successfully created BAM index files." << endl;
						}
						// re-locate indexes with first reader - could not seem to locate BTI files created!
						if( !bamReader.LocateIndexes() ) {
							cerr << "WARNING: Failed to locate BAM index file" << plural << " just created!" << endl;
							bamReaderSetRegions = false;
						}
					}
				} else {
					cerr << "Warning: BAM index file" << plural << " not located for targeted BAM access." << endl;
					bamReaderSetRegions = false;
				}
			}
			// cancel region filtering if there are no regions to iterate (unexpected)
			if( !regions->GetNextRegion( trgContig, trgSrtPos, trgEndPos ) ) {
				onlyOnTargetReads = bamReaderSetRegions = false;
			}
			if( bamReaderSetRegions ) {
				bamReader.Jump( trgContig, trgSrtPos-maxReadLen );
			}
		}
		BamAlignment aln;
		while( bamReader.GetNextAlignmentCore(aln) ) {
			// appears to be an undocumented behavior here
			if( aln.RefID < 0 ) continue;
			// skip filtered reads by flag, length or mapping quality
			if( aln.AlignmentFlag & skipFlag ) continue;
			if( aln.MapQuality < minMapQuality ) continue;
			int32_t endPos = aln.GetEndPosition();
			if( minAlignLength > 0 ) {
				if( endPos - aln.Position < minAlignLength ) continue;
			}
			// screen for on-target reads
			if( onlyOnTargetReads ) {
				// find next region overlapping or beyond of current read
				bool moreRegions = true;
				bool setRegion = false;
				while( aln.RefID > trgContig || (aln.RefID == trgContig && aln.Position > trgEndPos) ) {
					if( !regions->GetNextRegion( trgContig, trgSrtPos, trgEndPos ) ) {
						moreRegions = false;
						break;
					}
					setRegion = bamReaderSetRegions;
				}
				if( !moreRegions ) {
					// prevent further on-target checks and exit early if not using sumStats
					onlyOnTargetReads = false;
					if( trackAllReads ) {
						// force tracking of off-target reads
						regions->TrackReadsOnRegion(aln,endPos);
						if( readTracker ) readTracker->Write(aln,endPos);
						continue;
					}
					break;
				}
				if( setRegion ) {
					// track max read length for future index jumps - just in case long reads ever used
					if( endPos - aln.Position > maxReadLen ) {
						maxReadLen = endPos - aln.Position;
						if( maxReadLen > minJumpLen ) minJumpLen = maxReadLen;
					}
					if( aln.RefID != trgContig || trgSrtPos - aln.Position > minJumpLen ) {
						bamReader.Jump( trgContig, trgSrtPos-maxReadLen );
					}
				}
				if( aln.RefID < trgContig || endPos < trgSrtPos ) {
					// force tracking of off-target reads
					if( trackAllReads ) {
						regions->TrackReadsOnRegion(aln,endPos);
						if( readTracker ) readTracker->Write(aln,endPos);
					}
					continue;	// current is before next target region - fetch the next within bounds
				}
			}
			// record base coverage and region coverage statistics
			if( useBaseCov ) {
				endPos = baseCov.AddAlignment(aln,endPos);
				if( endPos <= 0 ) {
					if( endPos == 0 ) continue;	// read was silently ignored
					cerr << "ERROR: BAM file is not correctly sorted vs. reference." << endl;
					return 1;
				}
			}
			// record read coverage and region coverage statistics
			if( regions ) {
				regions->TrackReadsOnRegion(aln,endPos);
			}
			if( readTracker ) {
				readTracker->Write(aln,endPos);
			}
		}
		// flush and close objects associated with output
		baseCov.Flush();
	}
	// Output in-memory region stats file and ensure BBC file is closed
	if( regions ) {
		// build output fields title string
		string outFields = "contig_id,contig_srt,contig_end";
		if( !auxRegionSplit.empty() ) outFields += "," + auxRegionSplit[1];
		if( !covstatsStaticFields.empty() ) outFields += "," + covstatsStaticFields;
		regions->Write( covstatsFile, outFields );
		if( !sumstatsFile.empty() ) {
			regions->WriteSummary( sumstatsFile, invertOnTarget );
		}
		delete regions;
	}
	delete bbcCreate;

	// Complete remaining file creation options using a BBC file input
	// NOTE: Using BbbCreate for this would require code duplication and concurrent file output streaming
	if( f_bci || f_cbc ) {
		// Check BBC file source
		if( haveBbcFile ) {
			bbcfileRoot = cmdArgs[0];
	    	int i = bbcfileRoot.size() - 4;
	    	if( i > 0 && bbcfileRoot.substr(i,4) == ".bbc" ) {
	    		bbcfileRoot = bbcfileRoot.substr(0,i);
	    	}
		} else if( !bbcView.Open( bbcfileRoot+".bbc", true ) ) {
			cerr << "ERROR: Unexpected failure to read new BBC file '"+bbcfileRoot+".bam'" << endl;
			return 1;
		}
		if( f_bci ) {
			BbcIndex indexer( bbcfileRoot+".bci" );
			if( !bbcView.CreateIndex(indexer) ) {
				cerr << "ERROR: Failed to create index file '" << bbcfileRoot << ".bci'" << endl;
				return 1;
			}
		}
		if( f_cbc ) {
			// CBC generation can use BCI file but is no faster since whole BBC file is read
			BbcCoarse cbcWriter( bbcfileRoot+".cbc" );
			if( !bbcView.CreateCbc(cbcWriter) ) {
				cerr << "ERROR: Failed to create coarse base coverage file '" << bbcfileRoot << ".cbc'" << endl;
				return 1;
			}
		}
	}
	return 0;
}
示例#7
0
bool MergeTool::MergeToolPrivate::Run(void) {

    // set to default input if none provided
    if ( !m_settings->HasInputBamFilename )
        m_settings->InputFiles.push_back(Options::StandardIn());

    // opens the BAM files (by default without checking for indexes)
    BamMultiReader reader;
    if ( !reader.Open(m_settings->InputFiles) ) {
        cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl;
        return false;
    }

    // retrieve header & reference dictionary info
    std::string mergedHeader = reader.GetHeaderText();
    RefVector references = reader.GetReferenceData();

    // determine compression mode for BamWriter
    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
                               !m_settings->IsForceCompression );
    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
    if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;

    // open BamWriter
    BamWriter writer;
    writer.SetCompressionMode(compressionMode);
    if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) {
        cerr << "bamtools merge ERROR: could not open "
             << m_settings->OutputFilename << " for writing." << endl;
        reader.Close();
        return false;
    }

    // if no region specified, store entire contents of file(s)
    if ( !m_settings->HasRegion ) {
        BamAlignment al;
        while ( reader.GetNextAlignmentCore(al) )
            writer.SaveAlignment(al);
    }

    // otherwise attempt to use region as constraint
    else {

        // if region string parses OK
        BamRegion region;
        if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {

            // attempt to find index files
            reader.LocateIndexes();

            // if index data available for all BAM files, we can use SetRegion
            if ( reader.HasIndexes() ) {

                // attempt to use SetRegion(), if failed report error
                if ( !reader.SetRegion(region.LeftRefID,
                                       region.LeftPosition,
                                       region.RightRefID,
                                       region.RightPosition) )
                {
                    cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range"
                         << endl;
                    reader.Close();
                    return false;
                }

                // everything checks out, just iterate through specified region, storing alignments
                BamAlignment al;
                while ( reader.GetNextAlignmentCore(al) )
                    writer.SaveAlignment(al);
            }

            // no index data available, we have to iterate through until we
            // find overlapping alignments
            else {
                BamAlignment al;
                while ( reader.GetNextAlignmentCore(al) ) {
                    if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
                         (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
                    {
                        writer.SaveAlignment(al);
                    }
                }
            }
        }

        // error parsing REGION string
        else {
            cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl;
            cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
                 << endl;
            reader.Close();
            writer.Close();
            return false;
        }
    }

    // clean & exit
    reader.Close();
    writer.Close();
    return true;
}
示例#8
0
int FileReader::runInternal()
{
    ogeNameThread("am_FileReader");

    if(!format_specified)
        format = deduceFileFormat();

    if(format == FORMAT_BAM)
    {
        BamMultiReader reader;
        
        if(!reader.Open(filenames)) {
            cerr << "Error opening BAM files." << endl;
            reader.Close();
            return -1;
        }
        
        header = reader.GetHeader();
        references = reader.GetReferenceData();
        open = true;
        
        BamAlignment * al;
        
        while(true)
        {
            if(load_string_data)
                al = reader.GetNextAlignment();
            else
                al = reader.GetNextAlignmentCore();

            if(!al)
                break;
            
            putOutputAlignment(al);
        }
        
        reader.Close();
    } else if(format == FORMAT_SAM) {
        
        vector<SamReader> readers;
        
        SamHeader first_header;

        // before doing any reading, open the files to
        // verify they are the right format, etc.
        for(int i = 0; i < filenames.size(); i++) {
            SamReader reader;
            
            if(!reader.Open(filenames[i])) {
                cerr << "Error opening SAM file: " << filenames[i] << endl;
                return -1;
            }

            if(filenames.size() > 1 && i == 0)
                first_header = header;
            
            // TODO: We can probably find a better way to deal with multiple SAM file headers,
            // but for now we should disallow different headers to avoid issues.
            if(i > 0 && header.ToString() != first_header.ToString())
                cerr << "Warning! SAM input files have different headers." << endl;
            
            reader.Close();
        }

        for(int i = 0; i < filenames.size(); i++) {
            SamReader reader;
            
            if(!reader.Open(filenames[i])) {
                cerr << "Error opening SAM file: " << filenames[i] << endl;
                return -1;
            }
            
            header = reader.GetHeader();
            references = reader.GetReferenceData();
            open = true;
            
            if(filenames.size() > 1 && i == 0)
                first_header = header;

            BamAlignment * al = NULL;
            while(true)
            {
                al = reader.GetNextAlignment();
                
                if(NULL == al)
                    break;
                
                putOutputAlignment(al);
            }

            reader.Close();
        }
    } else {
        cerr << "FileReader couldn't detect file format. Aborting." << endl;
        exit(-1);
        return -1;
    }

    return 0;
}
示例#9
0
int MergeTool::Run(int argc, char* argv[]) {

    // parse command line arguments
    Options::Parse(argc, argv, 1);

    // set to default input if none provided
    if ( !m_settings->HasInputBamFilename )
        m_settings->InputFiles.push_back(Options::StandardIn());

    // opens the BAM files (by default without checking for indexes)
    BamMultiReader reader;
    if ( !reader.Open(m_settings->InputFiles, false, true) ) {
        cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
        return 1;
    }

    // retrieve header & reference dictionary info
    std::string mergedHeader = reader.GetHeaderText();
    RefVector references = reader.GetReferenceData();

    // open writer
    BamWriter writer;
    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
    if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) {
        cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl;
        reader.Close();
        return 1;
    }

    // if no region specified, store entire contents of file(s)
    if ( !m_settings->HasRegion ) {
        BamAlignment al;
        while ( reader.GetNextAlignmentCore(al) )
            writer.SaveAlignment(al);
    }

    // otherwise attempt to use region as constraint
    else {

        // if region string parses OK
        BamRegion region;
        if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {

            // attempt to re-open reader with index files
            reader.Close();
            bool openedOK = reader.Open(m_settings->InputFiles, true, true );

            // if error
            if ( !openedOK ) {
                cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
                return 1;
            }

            // if index data available, we can use SetRegion
            if ( reader.IsIndexLoaded() ) {

                // attempt to use SetRegion(), if failed report error
                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
                    cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
                    reader.Close();
                    return 1;
                }

                // everything checks out, just iterate through specified region, storing alignments
                BamAlignment al;
                while ( reader.GetNextAlignmentCore(al) )
                    writer.SaveAlignment(al);
            }

            // no index data available, we have to iterate through until we
            // find overlapping alignments
            else {
                BamAlignment al;
                while ( reader.GetNextAlignmentCore(al) ) {
                    if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
                            (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
                    {
                        writer.SaveAlignment(al);
                    }
                }
            }
        }

        // error parsing REGION string
        else {
            cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
            cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
            reader.Close();
            writer.Close();
            return 1;
        }
    }

    // clean & exit
    reader.Close();
    writer.Close();
    return 0;
}