CArchiveMerge::CArchiveMerge ( const vector <string>& inputFilenames, const string& outputFilename, uint64_t *readNo, const bool& isSolid, const string& commandLine, const string& paired_end_ann_file, const string& single_end_ann_file, const unsigned int& fragmentLength, const unsigned int& localAlignmentSearchRadius, const bool& hasSpecial, const unsigned char& statMappingQuality) : _inputFilenames(inputFilenames) , _outputFilename(outputFilename) , _readNo (readNo) , _isSolid (isSolid) , _expectedFragmentLength (fragmentLength) , _localAlignmentSearchRadius(localAlignmentSearchRadius) , _refIndex() , _referenceSequences() , _referenceSequencesWoSpecial() , _readGroups() , _alignmentStatus(0) , _isPairedEnd(false) , _hasSpecial (hasSpecial) , _statMappingQuality (statMappingQuality) , _sequencingTechnologies() , _readGroupsMap() , _specialArchiveName() , _specialCode1() , _specialCode2() , _specialReader() , _specialAl() , _special_owner(0) , _specialArchiveEmpty(true) , _specialReferenceSequences() , _counters() , _statisticsMaps() , _sHeader() , _rHeader() , _sBam() , _rBam() , za1() , za2() , _entropy() , _mqCalculator() { //_statisticsMaps = mStatisticsMaps; MosaikReadFormat::CAlignmentReader reader; reader.Open( _inputFilenames[0] ); reader.GetReadGroups(_readGroups); _alignmentStatus = reader.GetStatus(); reader.Close(); _isPairedEnd = ( ( _alignmentStatus & AS_PAIRED_END_READ ) != 0 ) ? true : false; _refIndex.resize(_inputFilenames.size(), 0); for ( vector<MosaikReadFormat::ReadGroup>::iterator ite = _readGroups.begin(); ite != _readGroups.end(); ++ite ) { ite->ReadGroupCode = ite->GetCode( *ite ); _readGroupsMap[ ite->ReadGroupCode ] = *ite ; } _sequencingTechnologies = _readGroups[0].SequencingTechnology; // the last archive is the one containing alignments located at special references for ( unsigned int i = 0; i < _inputFilenames.size(); i++ ) { reader.Open( _inputFilenames[i] ); // grab reference info from the archive vector<ReferenceSequence> referenceSequences; reader.GetReferenceSequences(referenceSequences); CopyReferenceString( referenceSequences ); _referenceSequences.insert( _referenceSequences.end(), referenceSequences.begin(), referenceSequences.end() ); _refIndex[i] = ( i == 0 ) ? referenceSequences.size() : referenceSequences.size() + _refIndex[i-1]; // don't include the special references if ((_hasSpecial) && (i != ( _inputFilenames.size() - 1))) _referenceSequencesWoSpecial.insert( _referenceSequencesWoSpecial.end(), referenceSequences.begin(), referenceSequences.end() ); // includes the special references if ((_hasSpecial) && (i == ( _inputFilenames.size() - 1))) _specialReferenceSequences.insert( _specialReferenceSequences.end(), referenceSequences.begin(), referenceSequences.end() ); referenceSequences.clear(); reader.Close(); } if (!_hasSpecial) _referenceSequencesWoSpecial = _referenceSequences; /* if (_hasSpecial) { for (unsigned int i = 0; i < _specialReferenceSequences.size(); ++i) { printf("%s\n",_specialReferenceSequences[i].Species.c_str()); } } */ _sHeader.SortOrder = SORTORDER_UNSORTED; //_uHeader.SortOrder = SORTORDER_UNSORTED; _rHeader.SortOrder = SORTORDER_UNSORTED; _sHeader.pReferenceSequences = &_referenceSequences; //_uHeader.pReferenceSequences = &_referenceSequencesWoSpecial; _rHeader.pReferenceSequences = &_referenceSequencesWoSpecial; _sHeader.pReadGroups = &_readGroups; //_uHeader.pReadGroups = &_readGroups; _rHeader.pReadGroups = &_readGroups; ProgramGroup pg; pg.ID = "MosaikAligner"; stringstream ss; ss << (int)MOSAIK_MAJOR_VERSION << "." << (int)MOSAIK_MINOR_VERSION << "." << (int)MOSAIK_BUILD_VERSION; pg.VN = ss.str(); pg.CL = commandLine; _sHeader.pg.ID = "MosaikAligner"; _rHeader.pg.ID = "MosaikAligner"; _sHeader.pg.VN = ss.str(); _rHeader.pg.VN = ss.str(); _sHeader.pg.CL = commandLine; _rHeader.pg.CL = commandLine; _mqCalculator.Open(paired_end_ann_file, single_end_ann_file); }
void CMosaikAligner::MergeArchives(void) { // set active threads unsigned int nThread = ( mSettings.NumThreads < outputFilenames.size() ) ? mSettings.NumThreads : outputFilenames.size(); vector< string > temporaryFiles; temporaryFiles.resize( outputFilenames.size() ); for ( unsigned int i = 0; i < outputFilenames.size(); i++ ) { string tempFilename; CFileUtilities::GetTempFilename(tempFilename); temporaryFiles[i] = tempFilename; } // calculate total # of reads unsigned int nReads = 0; for ( unsigned int i = 0 ; i < outputFilenames.size(); i++ ) { MosaikReadFormat::CAlignmentReader reader; reader.Open( outputFilenames[i] ); nReads += reader.GetNumReads(); reader.Close(); } // if nThread is too large, it'll open too many files at the same time. // Then, we'll get an error since system doesn't allow us to open any file. if ( nThread > 7 ) nThread = 7; // prepare reference offset vector for SOLiD //vector<unsigned int> refOffsets; //refOffsets.resize(referenceGroups.size()); //for ( unsigned int i = 0; i < referenceGroups.size(); i++ ) { // unsigned int startRef = referenceGroups[i].first; // refOffsets[i] = referenceSequences[startRef].Begin; //} CConsole::Heading(); cout << endl << "Sorting alignment archive:" << endl; CConsole::Reset(); SortThread sThread ( outputFilenames, temporaryFiles, nThread, nReads, mSettings.MedianFragmentLength ); sThread.Start(); CConsole::Heading(); cout << "Merging alignment archive:" << endl; CConsole::Reset(); unsigned int readNo = 0; unsigned int nMaxAlignment = 1000; CProgressBar<unsigned int>::StartThread(&readNo, 0, nReads, "reads"); CArchiveMerge merger( temporaryFiles, mSettings.OutputReadArchiveFilename, nMaxAlignment, &readNo ); merger.Merge(); CProgressBar<unsigned int>::WaitThread(); for ( unsigned int i = 0; i < outputFilenames.size(); i++ ) rm(outputFilenames[i].c_str()); for ( unsigned int i = 0; i < temporaryFiles.size(); i++ ) rm(temporaryFiles[i].c_str()); // get statistics information CArchiveMerge::StatisticsCounters mergeCounters; merger.GetStatisticsCounters( mergeCounters ); mStatisticsCounters.AlignedReads = mergeCounters.AlignedReads; mStatisticsCounters.BothNonUniqueReads = mergeCounters.BothNonUniqueReads; mStatisticsCounters.BothUniqueReads = mergeCounters.BothUniqueReads; mStatisticsCounters.OneNonUniqueReads = mergeCounters.OneNonUniqueReads; mStatisticsCounters.OrphanedReads = mergeCounters.OrphanedReads; mStatisticsCounters.FilteredOutMates = mergeCounters.FilteredOutMates; mStatisticsCounters.NonUniqueMates = mergeCounters.NonUniqueMates; mStatisticsCounters.UniqueMates = mergeCounters.UniqueMates; }