コード例 #1
0
ファイル: index.cpp プロジェクト: AlgoLab/FastStringGraph
void indexInMemorySAIS()
{
    std::cout << "Building index for " << opt::readsFile << " in memory using SAIS\n";

	if(opt::bBuildForward || opt::bBuildReverse)
    {
		// Parse the initial read table
		ReadTable* pRT = new ReadTable(opt::readsFile);

		// Create and write the suffix array for the forward reads
		if(opt::bBuildForward)
		{
			buildIndexForTable(opt::prefix, pRT, false);
		}

		if(opt::bBuildReverse)
		{
			// Reverse all the reads
			pRT->reverseAll();

			// Build the reverse suffix array
			buildIndexForTable(opt::prefix, pRT, true);
		}

		delete pRT;
	}
}
コード例 #2
0
// Extracts junctions from all the SAM hits (based on REF_SKIPs) in the hit file
// resets the stream when finished.
void get_junctions_from_hits(HitStream& hit_stream, 
			     ReadTable& it, 
			     JunctionSet& junctions)
{
  HitsForRead curr_hit_group;
  hit_stream.next_read_hits(curr_hit_group);
  
  uint32_t curr_obs_order = it.observation_order(curr_hit_group.insert_id);
  
  while(curr_obs_order != 0xFFFFFFFF)
    {
      for (size_t i = 0; i < curr_hit_group.hits.size(); ++i)
	{
	  BowtieHit& bh = curr_hit_group.hits[i];
	  if (!bh.contiguous())
	    {
	      junctions_from_alignment(bh, junctions);
	    }
	  hit_stream.next_read_hits(curr_hit_group);
	  curr_obs_order = it.observation_order(curr_hit_group.insert_id);
	}
    }
  
  hit_stream.reset();
}
コード例 #3
0
ファイル: SuffixArray.cpp プロジェクト: avilella/sga
// Initialize a suffix array for the strings in RT
void SuffixArray::initialize(const ReadTable& rt)
{
    size_t n = rt.countSumLengths() + rt.getCount(); 
    initialize(n, rt.getCount());

    // Fill the data table with the linear ordering of the suffixes
    size_t count = 0;
    for(size_t i = 0; i < rt.getCount(); ++i)
    {
        // + 1 below is for the empty suffix (is it actually needed?)
        for(size_t j = 0; j < rt.getRead(i).seq.length() + 1; ++j)
        {
            m_data[count++] = SAElem(i, j);
        }
    }
}
コード例 #4
0
ファイル: closures.cpp プロジェクト: alexindata/tophat
void closure_driver(vector<FZPipe>& map1, 
		    vector<FZPipe>& map2, 
		    ifstream& ref_stream, 
		    FILE* juncs_file,
		    FILE* fusions_out)
{
  typedef RefSequenceTable::Sequence Reference;
  
  ReadTable it;
  RefSequenceTable rt(true);

  BowtieHitFactory hit_factory(it, rt);

  std::set<Fusion> fusions;
  
  fprintf (stderr, "Finding near-covered motifs...");
  CoverageMapVisitor cov_map_visitor(ref_stream, rt);
  uint32_t coverage_attempts = 0;
  
  assert(map1.size() == map2.size());
  for (size_t num = 0; num < map1.size(); ++num)
    {
      HitStream left_hs(map1[num].file, &hit_factory, false, true, false);
      HitStream right_hs(map2[num].file, &hit_factory, false, true, false);
      
      HitsForRead curr_left_hit_group;
      HitsForRead curr_right_hit_group;
      
      left_hs.next_read_hits(curr_left_hit_group);
      right_hs.next_read_hits(curr_right_hit_group);
      
      uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
      uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
      
      while(curr_left_obs_order != VMAXINT32 &&
	    curr_right_obs_order != VMAXINT32)
	{
	  while (curr_left_obs_order < curr_right_obs_order&&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order > curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order == curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      if (num == 0)
		find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
	      
	      if (coverage_attempts++ % 10000 == 0)
		fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);

	      visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
	      
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
		    
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	}
    }
  
  cov_map_visitor.finalize();
  fprintf (stderr, "done\n");
  
  ClosureJunctionSet fwd_splices;
  ClosureJunctionSet rev_splices;
  
  JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders);
  fprintf (stderr, "Searching for closures...");
  uint32_t closure_attempts = 0;
  
  for (size_t num = 0; num < map1.size(); ++num)
    {
      map1[num].rewind();
      map2[num].rewind();
      
      HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false);
      HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false);
      
      HitsForRead curr_left_hit_group;
      HitsForRead curr_right_hit_group;
      
      left_hs.next_read_hits(curr_left_hit_group);
      right_hs.next_read_hits(curr_right_hit_group);
      
      uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
      uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
      
      while(curr_left_obs_order != VMAXINT32 &&
	    curr_right_obs_order != VMAXINT32)
	{
	  while (curr_left_obs_order < curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order > curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {
	      // Get hit group
	      
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	  
	  while (curr_left_obs_order == curr_right_obs_order &&
		 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
	    {	
	      if (closure_attempts++ % 10000 == 0)
		fprintf (stderr, "Trying to close pair %d\n", closure_attempts);

	      visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
	      left_hs.next_read_hits(curr_left_hit_group);
	      curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
	      
	      right_hs.next_read_hits(curr_right_hit_group);
	      curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
	    }
	}
    }

  for (size_t num = 0; num < map1.size(); ++num)
    {
      map1[num].close();
      map2[num].close();
    }
  
  fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size());
  fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size());
  
  fprintf (stderr, "done\n");
  uint32_t num_potential_splices = 0;
  fprintf (stderr, "Reporting possible junctions...");
  map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr;
  f_itr = junc_map_visitor._finders.begin();
  
  ClosureJunctionSet::iterator j_itr;
  j_itr = fwd_splices.begin();
  while (j_itr != fwd_splices.end())
    {
      fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
	       rt.get_name(j_itr->refid),
	       j_itr->left,j_itr->right,'+');
      ++num_potential_splices;
      ++j_itr;
    }
  
  j_itr = rev_splices.begin();
  while (j_itr != rev_splices.end())
    {
      fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
	       rt.get_name(j_itr->refid),
	       j_itr->left,j_itr->right,'-');
      ++num_potential_splices;
      ++j_itr;
    }
  
  //accept_all_best_hits(best_status_for_inserts);
  fprintf(stderr, "done\n");
  fprintf(stderr, "Searched for closures between %d pairs\n", searched);
  fprintf(stderr, "Successfully closed %d pairs\n", closed);
  
  fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);

  // daehwan
#if 0
  fprintf (stderr, "Reporting potential fusions...\n");
  if(fusions_out){
    for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
      const char* ref_name1 = rt.get_name(itr->refid1);
      const char* ref_name2 = rt.get_name(itr->refid2);
      
      const char* dir = "";
      if (itr->dir == FUSION_FR)
	dir = "fr";
      else if(itr->dir == FUSION_RF)
	dir = "rf";
      else
	dir = "ff";
      
      fprintf(fusions_out,
	      "%s\t%d\t%s\t%d\t%s\n",
	      ref_name1,
	      itr->left,
	      ref_name2,
	      itr->right,
	      dir);
    }
    fclose(fusions_out);
  }else{
    fprintf(stderr, "Failed to open fusions file for writing\n");
  }
#endif
}
コード例 #5
0
ファイル: overlap-long.cpp プロジェクト: Milt0n/sga
//
// Main
//
int overlapLongMain(int argc, char** argv)
{
    parseOverlapLongOptions(argc, argv);

    // Open output file
    std::ostream* pASQGWriter = createWriter(opt::outFile);

    // Build and write the ASQG header
    ASQG::HeaderRecord headerRecord;
    headerRecord.setOverlapTag(opt::minOverlap);
    headerRecord.setErrorRateTag(opt::errorRate);
    headerRecord.setInputFileTag(opt::readsFile);
    headerRecord.setTransitiveTag(true);
    headerRecord.write(*pASQGWriter);

    // Determine which index files to use. If a target file was provided,
    // use the index of the target reads
    std::string indexPrefix;
    if(!opt::targetFile.empty())
        indexPrefix = stripFilename(opt::targetFile);
    else
        indexPrefix = stripFilename(opt::readsFile);

    BWT* pBWT = new BWT(indexPrefix + BWT_EXT, opt::sampleRate);
    SampledSuffixArray* pSSA = new SampledSuffixArray(indexPrefix + SAI_EXT, SSA_FT_SAI);
    
    Timer* pTimer = new Timer(PROGRAM_IDENT);
    pBWT->printInfo();

    // Read the sequence file and write vertex records for each
    // Also store the read names in a vector of strings
    ReadTable reads;
    
    SeqReader* pReader = new SeqReader(opt::readsFile, SRF_NO_VALIDATION);
    SeqRecord record;
    while(pReader->get(record))
    {
        reads.addRead(record.toSeqItem());
        ASQG::VertexRecord vr(record.id, record.seq.toString());
        vr.write(*pASQGWriter);

        if(reads.getCount() % 100000 == 0)
            printf("Read %zu sequences\n", reads.getCount());
    }

    delete pReader;
    pReader = NULL;

    BWTIndexSet index;
    index.pBWT = pBWT;
    index.pSSA = pSSA;
    index.pReadTable = &reads;

    // Make a prefix for the temporary hits files
    size_t n_reads = reads.getCount();

    omp_set_num_threads(opt::numThreads);

#pragma omp parallel for
    for(size_t read_idx = 0; read_idx < n_reads; ++read_idx)
    {
        const SeqItem& curr_read = reads.getRead(read_idx);

        printf("read %s %zubp\n", curr_read.id.c_str(), curr_read.seq.length());
        SequenceOverlapPairVector sopv = 
            KmerOverlaps::retrieveMatches(curr_read.seq.toString(),
                                          opt::seedLength,
                                          opt::minOverlap,
                                          1 - opt::errorRate,
                                          100,
                                          index);

        printf("Found %zu matches\n", sopv.size());
        for(size_t i = 0; i < sopv.size(); ++i)
        {
            std::string match_id = reads.getRead(sopv[i].match_idx).id;

            // We only want to output each edge once so skip this overlap
            // if the matched read has a lexicographically lower ID
            if(curr_read.id > match_id)
                continue;

            std::string ao = ascii_overlap(sopv[i].sequence[0], sopv[i].sequence[1], sopv[i].overlap, 50);
            printf("\t%s\t[%d %d] ID=%s OL=%d PI:%.2lf C=%s\n", ao.c_str(),
                                                                sopv[i].overlap.match[0].start,
                                                                sopv[i].overlap.match[0].end,
                                                                match_id.c_str(),
                                                                sopv[i].overlap.getOverlapLength(),
                                                                sopv[i].overlap.getPercentIdentity(),
                                                                sopv[i].overlap.cigar.c_str());

            // Convert to ASQG
            SeqCoord sc1(sopv[i].overlap.match[0].start, sopv[i].overlap.match[0].end, sopv[i].overlap.length[0]);
            SeqCoord sc2(sopv[i].overlap.match[1].start, sopv[i].overlap.match[1].end, sopv[i].overlap.length[1]);
            
            // KmerOverlaps returns the coordinates of the overlap after flipping the reads
            // to ensure the strand matches. The ASQG file wants the coordinate of the original
            // sequencing strand. Flip here if necessary
            if(sopv[i].is_reversed)
                sc2.flip();

            // Convert the SequenceOverlap the ASQG's overlap format
            Overlap ovr(curr_read.id, sc1, match_id,  sc2, sopv[i].is_reversed, -1);

            ASQG::EdgeRecord er(ovr);
            er.setCigarTag(sopv[i].overlap.cigar);
            er.setPercentIdentityTag(sopv[i].overlap.getPercentIdentity());

#pragma omp critical
            {
                er.write(*pASQGWriter);
            }
        }
    }

    // Cleanup
    delete pReader;
    delete pBWT; 
    delete pSSA;
    
    delete pASQGWriter;
    delete pTimer;
    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}
コード例 #6
0
// Compute the initial BWTs for the input file split into blocks of records using the SAIS algorithm
MergeVector computeInitialSAIS(const BWTDiskParameters& parameters)
{
    SeqReader* pReader = new SeqReader(parameters.inFile);
    SeqRecord record;

    int groupID = 0;
    size_t numReadTotal = 0;

    MergeVector mergeVector;
    MergeItem mergeItem;
    mergeItem.start_index = 0;

    // Phase 1: Compute the initial BWTs
    ReadTable* pCurrRT = new ReadTable;
    bool done = false;
    while(!done)
    {
        done = !pReader->get(record);

        if(!done)
        {
            // the read is valid
            SeqItem item = record.toSeqItem();
            if(parameters.bBuildReverse)
                item.seq.reverse();
            pCurrRT->addRead(item);
            ++numReadTotal;
        }

        if(pCurrRT->getCount() >= parameters.numReadsPerBatch || (done && pCurrRT->getCount() > 0))
        {
            // Compute the SA and BWT for this group
            SuffixArray* pSA = new SuffixArray(pCurrRT, 1);

            // Write the BWT to disk                
            std::string bwt_temp_filename = makeTempName(parameters.outPrefix, groupID, parameters.bwtExtension);
            pSA->writeBWT(bwt_temp_filename, pCurrRT);

            std::string sai_temp_filename = makeTempName(parameters.outPrefix, groupID, parameters.saiExtension);
            pSA->writeIndex(sai_temp_filename);

            // Push the merge info
            mergeItem.end_index = numReadTotal - 1; // inclusive
            mergeItem.reads_filename = parameters.inFile;
            mergeItem.bwt_filename = bwt_temp_filename;
            mergeItem.sai_filename = sai_temp_filename;
            mergeVector.push_back(mergeItem);

            // Cleanup
            delete pSA;

            // Start the new group
            mergeItem.start_index = numReadTotal;
            ++groupID;
            pCurrRT->clear();
        }
    }
    delete pCurrRT;
    delete pReader;
    return mergeVector;
}
コード例 #7
0
ファイル: inserts.cpp プロジェクト: genome-vendor/tophat
void best_insert_mappings(uint64_t refid,
						  ReadTable& it,
						  /*const string& name,*/
						  HitList& hits1_in_ref,
						  HitList& hits2_in_ref,
						  BestInsertAlignmentTable& best_status_for_inserts,
						  bool prefer_shorter_pairs)
{	
	
	long chucked_for_shorter_pair = 0;
	std::set<size_t> marked;
	HitList::iterator last_good = hits2_in_ref.begin();
	
	for (size_t i = 0; i < hits1_in_ref.size(); ++i)
	{
		BowtieHit& h1 = hits1_in_ref[i];
		pair<HitList::iterator, HitList::iterator> range_pair;
		range_pair = equal_range(last_good, hits2_in_ref.end(),
								 h1, hit_insert_id_lt);
		bool found_hit = false;
		if (range_pair.first != range_pair.second)
			last_good = range_pair.first;
		
		uint32_t obs_order = it.observation_order(h1.insert_id());
		
		for (HitList::iterator f = range_pair.first;
			 f != range_pair.second;
			 ++f)
		{
			BowtieHit& h2 = *f;
			
			if (h1.insert_id() == h2.insert_id())
			{
				// max mate inner distance (genomic)
				int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
				if (max_mate_inner_dist == -1)
				{
					max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
				}
				
				InsertAlignmentGrade s(h1, h2, min_mate_inner_dist, max_mate_inner_dist);
				
				pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
					= best_status_for_inserts[obs_order];
				InsertAlignmentGrade& current = insert_best.first;
				// Is the new status better than the current best one?
				if (current < s)
				{
					insert_best.second.clear();
					current = s;
					insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
				}
				else if (!(s < current))
				{
					if (prefer_shorter_pairs && current.num_mapped == 2)
					{
						pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment));
						pair<int, int> ds = pair_distances(h1,h2);
						if (ds.second < dc.second)
						{
							chucked_for_shorter_pair += insert_best.second.size();
							insert_best.second.clear();
							current = s;
							insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
						}
					}
					else
					{
						insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
					}
				}
				
				marked.insert(f - hits2_in_ref.begin());
				found_hit = true;
			}
			
		}
		if (!found_hit)
		{
			pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
			= best_status_for_inserts[obs_order];
			InsertAlignmentGrade& current = insert_best.first;	
			
			InsertAlignmentGrade s(h1);
			
			if (current < s)
			{	
				insert_best.second.clear();
				current = s;
				insert_best.second.push_back(InsertAlignment(refid, &h1, NULL));
			}
			else if (! (s < current))
			{
				insert_best.second.push_back(InsertAlignment(refid, &h1, NULL));
			}
			
		}
	}
	
	for (size_t i = 0; i < hits2_in_ref.size(); ++i)
	{
		BowtieHit& h2 = hits2_in_ref[i];
		uint32_t obs_order = it.observation_order(h2.insert_id());
		pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
			= best_status_for_inserts[obs_order];
		InsertAlignmentGrade& current = insert_best.first;	
		
		InsertAlignmentGrade s(h2);
		// Did we include h2 as part of a pairing already, or is this first time
		// we've seen it?  If so, it's a singleton.
		if (marked.find(i) == marked.end())
		{
			if (current < s)
			{
				insert_best.second.clear();
				current = s;
				insert_best.second.push_back(InsertAlignment(refid, NULL, &h2));
			}
			else if (! (s < current))
			{
				insert_best.second.push_back(InsertAlignment(refid, NULL, &h2));
			}
		}
	}	
	fprintf(stderr, "Chucked %ld pairs for shorter pairing of same mates\n", chucked_for_shorter_pair);
}
コード例 #8
0
ファイル: BWTDiskConstruction.cpp プロジェクト: avilella/sga
// The algorithm is as follows. We create M BWTs for subsets of 
// the input reads. These are created independently and written
// to disk. They are then merged either sequentially or pairwise
// to create the final BWT
void buildBWTDisk(const std::string& in_filename, const std::string& out_prefix, 
                  const std::string& bwt_extension, const std::string& sai_extension,
                  bool doReverse, int numThreads, int numReadsPerBatch, int storageLevel)
{
    size_t MAX_READS_PER_GROUP = numReadsPerBatch;

    SeqReader* pReader = new SeqReader(in_filename);
    SeqRecord record;

    int groupID = 0;
    size_t numReadTotal = 0;

    MergeVector mergeVector;
    MergeItem mergeItem;
    mergeItem.start_index = 0;

    // Phase 1: Compute the initial BWTs
    ReadTable* pCurrRT = new ReadTable;
    bool done = false;
    while(!done)
    {
        done = !pReader->get(record);

        if(!done)
        {
            // the read is valid
            SeqItem item = record.toSeqItem();
            if(doReverse)
                item.seq.reverse();
            pCurrRT->addRead(item);
            ++numReadTotal;
        }

        if(pCurrRT->getCount() >= MAX_READS_PER_GROUP || (done && pCurrRT->getCount() > 0))
        {
            // Compute the SA and BWT for this group
            SuffixArray* pSA = new SuffixArray(pCurrRT, numThreads);

            // Write the BWT to disk                
            std::string bwt_temp_filename = makeTempName(out_prefix, groupID, bwt_extension);
            pSA->writeBWT(bwt_temp_filename, pCurrRT);

            std::string sai_temp_filename = makeTempName(out_prefix, groupID, sai_extension);
            pSA->writeIndex(sai_temp_filename);

            // Push the merge info
            mergeItem.end_index = numReadTotal - 1; // inclusive
            mergeItem.reads_filename = in_filename;
            mergeItem.bwt_filename = bwt_temp_filename;
            mergeItem.sai_filename = sai_temp_filename;
            mergeVector.push_back(mergeItem);

            // Cleanup
            delete pSA;

            // Start the new group
            mergeItem.start_index = numReadTotal;
            ++groupID;
            pCurrRT->clear();
        }
    }
    delete pCurrRT;
    delete pReader;

    // Phase 2: Pairwise merge the BWTs
    int round = 1;
    MergeVector nextMergeRound;
    while(mergeVector.size() > 1)
    {
        std::cout << "Starting round " << round << "\n";
        pReader = new SeqReader(in_filename);
        for(size_t i = 0; i < mergeVector.size(); i+=2)
        {
            if(i + 1 != mergeVector.size())
            {
                std::string bwt_merged_name = makeTempName(out_prefix, groupID, bwt_extension);
                std::string sai_merged_name = makeTempName(out_prefix, groupID, sai_extension);

                MergeItem item1 = mergeVector[i];
                MergeItem item2 = mergeVector[i+1];

                // Perform the actual merge
                int64_t curr_idx = merge(pReader, item1, item2, 
                                         bwt_merged_name, sai_merged_name, 
                                         doReverse, numThreads, storageLevel);

                // pReader now points to the end of item1's block of 
                // reads. Skip item2's reads
                assert(curr_idx == item2.start_index);
                while(curr_idx <= item2.end_index)
                {
                    bool eof = !pReader->get(record);
                    assert(!eof);
                    (void)eof;
                    ++curr_idx;
                }

                // Create the merged mergeItem to use in the next round
                MergeItem merged;
                merged.start_index = item1.start_index;
                merged.end_index = item2.end_index;
                merged.bwt_filename = bwt_merged_name;
                merged.sai_filename = sai_merged_name;
                nextMergeRound.push_back(merged);

                // Done with the temp files, remove them
                unlink(item1.bwt_filename.c_str());
                unlink(item2.bwt_filename.c_str());
                unlink(item1.sai_filename.c_str());
                unlink(item2.sai_filename.c_str());

                ++groupID;
            }
            else
            {
                // Singleton, pass through to the next round
                nextMergeRound.push_back(mergeVector[i]);
            }
        }
        delete pReader;
        mergeVector.clear();
        mergeVector.swap(nextMergeRound);
        ++round;
    }
    assert(mergeVector.size() == 1);

    // Done, rename the files to their final name
    std::stringstream bwt_ss;
    bwt_ss << out_prefix << bwt_extension << (USE_GZ ? ".gz" : "");
    std::string bwt_final_filename = bwt_ss.str();
    rename(mergeVector.front().bwt_filename.c_str(), bwt_final_filename.c_str());

    std::stringstream sai_ss;
    sai_ss << out_prefix << sai_extension << (USE_GZ ? ".gz" : "");
    std::string sai_final_filename = sai_ss.str();
    rename(mergeVector.front().sai_filename.c_str(), sai_final_filename.c_str());
}