int main (int argc, char * argv[]) { vector<string> inputFilenames; string combinedOutFilename, alignmentsOutFilename; try { TCLAP::CmdLine cmd("Program description", ' ', VERSION); TCLAP::ValueArg<string> combinedOutputArg("o", "out", "Combined output filename (BAM format)", true, "", "combined.bam", cmd); TCLAP::ValueArg<int> minInsertArg("n", "min-insert", "Minimum insert size", false, DEFAULT_MIN_GAP, "min insert size", cmd); TCLAP::ValueArg<int> maxInsertArg("x", "max-insert", "Maximum insert size", false, DEFAULT_MAX_GAP, "max insert size", cmd); TCLAP::MultiArg<string> inputArgs("b", "bam", "Input BAM file", true, "input.bam", cmd); cmd.parse(argc, argv); combinedOutFilename = combinedOutputArg.getValue(); MIN_GAP = minInsertArg.getValue(); MAX_GAP = maxInsertArg.getValue(); inputFilenames = inputArgs.getValue(); } catch (TCLAP::ArgException &e) { cerr << "Error: " << e.error() << " " << e.argId() << endl; } // TODO require that alignments are sorted by name BamMultiReader reader; reader.Open(inputFilenames); if (!ValidOut.Open(combinedOutFilename, reader.GetHeader(), reader.GetReferenceData())) { cerr << ValidOut.GetErrorString() << endl; return 1; } string current, prev; char mateID; Group group; set<string> references; Alignment a; while (reader.GetNextAlignment(a)) { parseID(a.Name, current, mateID); if (current.compare(prev) && prev.size() > 0) { processGroup(group, references); group.clear(); references.clear(); } references.insert(a.RefName); GroupKey key; key.refID = a.RefName; key.mateID = mateID; key.rev = a.IsReverseStrand(); group.insert( std::make_pair( key, a ) ); prev = current; } processGroup(group, references); }
int FileReader::runInternal() { ogeNameThread("am_FileReader"); if(!format_specified) format = deduceFileFormat(); if(format == FORMAT_BAM) { BamMultiReader reader; if(!reader.Open(filenames)) { cerr << "Error opening BAM files." << endl; reader.Close(); return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; BamAlignment * al; while(true) { if(load_string_data) al = reader.GetNextAlignment(); else al = reader.GetNextAlignmentCore(); if(!al) break; putOutputAlignment(al); } reader.Close(); } else if(format == FORMAT_SAM) { vector<SamReader> readers; SamHeader first_header; // before doing any reading, open the files to // verify they are the right format, etc. for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } if(filenames.size() > 1 && i == 0) first_header = header; // TODO: We can probably find a better way to deal with multiple SAM file headers, // but for now we should disallow different headers to avoid issues. if(i > 0 && header.ToString() != first_header.ToString()) cerr << "Warning! SAM input files have different headers." << endl; reader.Close(); } for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; if(filenames.size() > 1 && i == 0) first_header = header; BamAlignment * al = NULL; while(true) { al = reader.GetNextAlignment(); if(NULL == al) break; putOutputAlignment(al); } reader.Close(); } } else { cerr << "FileReader couldn't detect file format. Aborting." << endl; exit(-1); return -1; } return 0; }