Ejemplo n.º 1
0
// aligns the read archive
void CMosaikAligner::AlignReadArchive(MosaikReadFormat::CReadReader& in, MosaikReadFormat::CAlignmentWriter& out, unsigned int* pRefBegin, unsigned int* pRefEnd, char** pBsRefSeqs) {

	// ==============
	// initialization
	// ==============

	// retrieve the concatenated reference sequence length
	/*
	vector<ReferenceSequence> referenceSequences;

	MosaikReadFormat::CReferenceSequenceReader refseq;
	refseq.Open(mSettings.ReferenceFilename);
	refseq.GetReferenceSequences(referenceSequences);
	mReferenceLength = refseq.GetReferenceSequenceLength();
	const unsigned int numRefSeqs = refseq.GetNumReferenceSequences();

	// retrieve the basespace reference filenames
	char** pBsRefSeqs = NULL;
	if(mFlags.EnableColorspace) {

		cout << "- loading basespace reference sequences... ";
		cout.flush();

		MosaikReadFormat::CReferenceSequenceReader bsRefSeq;
		bsRefSeq.Open(mSettings.BasespaceReferenceFilename);

		if(!bsRefSeq.HasSameReferenceSequences(referenceSequences)) {
			printf("ERROR: The basespace and colorspace reference sequence archives do not seem to represent the same FASTA file.\n"); 
			exit(1);
		}

		bsRefSeq.CopyReferenceSequences(pBsRefSeqs);
		bsRefSeq.Close();

		cout << "finished." << endl;
	}

	// initialize our hash tables
	InitializeHashTables(CalculateHashTableSize(mReferenceLength, mSettings.HashSize));

	// hash the concatenated reference sequence
	if(!mFlags.IsUsingJumpDB) HashReferenceSequence(refseq);

	cout << "- loading reference sequence... ";
	cout.flush();
	refseq.LoadConcatenatedSequence(mReference);
	cout << "finished." << endl;

	refseq.Close();

	// create our reference sequence LUTs
	unsigned int* pRefBegin = new unsigned int[numRefSeqs];
	unsigned int* pRefEnd   = new unsigned int[numRefSeqs];

	for(unsigned int j = 0; j < numRefSeqs; j++) {
		pRefBegin[j] = referenceSequences[j].Begin;
		pRefEnd[j]   = referenceSequences[j].End;
	}


	// set the hash positions threshold
	if(mFlags.IsUsingHashPositionThreshold && (mAlgorithm == CAlignmentThread::AlignerAlgorithm_ALL)) 
		mpDNAHash->RandomizeAndTrimHashPositions(mSettings.HashPositionThreshold);

	// localize the read archive filenames
	string inputReadArchiveFilename  = mSettings.InputReadArchiveFilename;
	string outputReadArchiveFilename = mSettings.OutputReadArchiveFilename;

	// define our read format reader and writer
	MosaikReadFormat::CReadReader in;
	in.Open(inputReadArchiveFilename);
	MosaikReadFormat::ReadGroup readGroup = in.GetReadGroup();
	*/

	ReadStatus readStatus          = in.GetStatus();
	
	/*
	mSettings.SequencingTechnology = readGroup.SequencingTechnology;
	mSettings.MedianFragmentLength = readGroup.MedianFragmentLength;
	*/

	
	const bool isPairedEnd = (readStatus == RS_PAIRED_END_READ ? true : false);

	/*
	vector<MosaikReadFormat::ReadGroup> readGroups;
	readGroups.push_back(readGroup);

	// set the alignment status flags
	AlignmentStatus alignmentStatus = AS_UNSORTED_READ | readStatus;
	if(mMode == CAlignmentThread::AlignerMode_ALL) alignmentStatus |= AS_ALL_MODE;
	else alignmentStatus |= AS_UNIQUE_MODE;

	MosaikReadFormat::CAlignmentWriter out;
	out.Open(mSettings.OutputReadArchiveFilename.c_str(), referenceSequences, readGroups, alignmentStatus);
	*/

	// open the unaligned read report file
	FILE* unalignedStream = NULL;
	if(mFlags.IsReportingUnalignedReads) {
		if(fopen_s(&unalignedStream, mSettings.UnalignedReadReportFilename.c_str(), "wb") != 0) {
			cout << "ERROR: Unable to open the unaligned read FASTQ file for output." << endl;
			exit(1);
		}
	}
	


	// localize our read and reference counts. Initialize our statistical counters
	uint64_t numReadArchiveReads = in.GetNumReads();
	uint64_t readCounter = 0;

	// initialize our threads
	pthread_t* activeThreads = new pthread_t[mSettings.NumThreads];

	CAlignmentThread::ThreadData td;
	td.Algorithm           = mAlgorithm;
	td.ReferenceLen        = mReferenceLength;
	td.Filters             = mFilters;
        td.SplitFilters        = mSplitFilters;
	td.Flags               = mFlags;
	td.Mode                = mMode;
	td.pReference          = mReference;
	td.pCounters           = &mStatisticsCounters;
	td.pDnaHash            = mpDNAHash;
	td.pIn                 = &in;
	td.pOut                = &out;
	td.pUnalignedStream    = unalignedStream;
	td.pRefBegin           = pRefBegin;
	td.pRefEnd             = pRefEnd;
	td.Settings            = mSettings;
	td.pReadCounter        = &readCounter;
	td.IsPairedEnd         = isPairedEnd;
	td.pBsRefSeqs          = pBsRefSeqs;

	// unenable EnableColorspace flag for low-memory algorithm, deal with the SOLiD convertion when sorting the aligned archives
	//if ( mFlags.UseLowMemory )
	//	td.Flags.EnableColorspace = false;

	pthread_attr_t attr;
	pthread_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

	pthread_mutex_init(&CAlignmentThread::mGetReadMutex,              NULL);
	pthread_mutex_init(&CAlignmentThread::mReportUnalignedMate1Mutex, NULL);
	pthread_mutex_init(&CAlignmentThread::mReportUnalignedMate2Mutex, NULL);
	pthread_mutex_init(&CAlignmentThread::mSaveReadMutex,             NULL);
	pthread_mutex_init(&CAlignmentThread::mStatisticsMutex,           NULL);
	pthread_mutex_init(&CAbstractDnaHash::mJumpCacheMutex,            NULL);
	pthread_mutex_init(&CAbstractDnaHash::mJumpKeyMutex,              NULL);
	pthread_mutex_init(&CAbstractDnaHash::mJumpPositionMutex,         NULL);

	// ===========================
	// start our alignment threads
	// ===========================

	// initialize our progress bar
	
	if ( !mFlags.UseLowMemory ) {
		CConsole::Heading();
		cout << endl;
	}
	cout << "Aligning read library (" << numReadArchiveReads << "):" << endl;
	if ( !mFlags.UseLowMemory )
	CConsole::Reset();

	CProgressBar<uint64_t>::StartThread(&readCounter, 0, numReadArchiveReads, "reads");

	// create our threads
	for(unsigned int i = 0; i < mSettings.NumThreads; i++)
		pthread_create(&activeThreads[i], &attr, CAlignmentThread::StartThread, (void*)&td);

	pthread_attr_destroy(&attr);

	CBenchmark alignmentBench;
	alignmentBench.Start();

	// wait for the threads to complete
	void* status = NULL;
	for(unsigned int i = 0; i < mSettings.NumThreads; i++) 
		pthread_join(activeThreads[i], &status);

	// wait for the progress bar to finish
	CProgressBar<uint64_t>::WaitThread();

	alignmentBench.Stop();

	// free up some memory
	//delete [] mReference;
	delete [] activeThreads;
	activeThreads = NULL;
	//if(pRefBegin) delete [] pRefBegin;
	//if(pRefEnd)   delete [] pRefEnd;

	//if(pBsRefSeqs) {
	//	for(unsigned int i = 0; i < numRefSeqs; ++i) delete [] pBsRefSeqs[i];
	//	delete [] pBsRefSeqs;
	//}

	// close open file streams
	//in.Close();
	
	// solid references should be one-base longer after converting back to basespace
	//if(mFlags.EnableColorspace) out.AdjustSolidReferenceBases();
	//out.Close();

	if(mFlags.IsReportingUnalignedReads) fclose(unalignedStream);
	//if(mFlags.IsUsingJumpDB) mpDNAHash->FreeMemory();

	// ====================
	// print our statistics
	// ====================
	/*
	const uint64_t totalMates = mStatisticsCounters.ShortMates +
		mStatisticsCounters.FailedHashMates +
		mStatisticsCounters.UniqueMates +
		mStatisticsCounters.NonUniqueMates +
		mStatisticsCounters.FilteredOutMates;

	const uint64_t totalAlignedMates = mStatisticsCounters.UniqueMates + mStatisticsCounters.NonUniqueMates;
	const uint64_t totalAlignedReads = mStatisticsCounters.AlignedReads;

	// print our alignment statistics (mates) if don't enable low-memory algorithm
	if ( !mFlags.UseLowMemory ) {

	printf("\n");
	CConsole::Heading(); printf("Alignment statistics (mates):\n"); CConsole::Reset();
	printf("===================================\n");

	if(mStatisticsCounters.ShortMates > 0)
		printf("# too short:    %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.ShortMates,       (mStatisticsCounters.ShortMates       / (double)totalMates) * 100.0);

	if(mStatisticsCounters.FailedHashMates > 0)
		printf("# failed hash:  %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.FailedHashMates,  (mStatisticsCounters.FailedHashMates  / (double)totalMates) * 100.0);

	if(mStatisticsCounters.FilteredOutMates > 0)
		printf("# filtered out: %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.FilteredOutMates, (mStatisticsCounters.FilteredOutMates / (double)totalMates) * 100.0);

	if(mStatisticsCounters.UniqueMates > 0)
		printf("# unique:       %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.UniqueMates,      (mStatisticsCounters.UniqueMates      / (double)totalMates) * 100.0);

	if(mStatisticsCounters.NonUniqueMates > 0)
		printf("# non-unique:   %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.NonUniqueMates,   (mStatisticsCounters.NonUniqueMates   / (double)totalMates) * 100.0);

	printf("-----------------------------------\n");
	printf("total:          %9llu\n", (unsigned long long)totalMates);
	printf("total aligned:  ");
	CConsole::Bold(); printf("%9llu", (unsigned long long)totalAlignedMates); CConsole::Reset();
	printf(" (");
	CConsole::Bold(); printf("%5.1f %%", (totalAlignedMates / (double)totalMates) * 100.0); CConsole::Reset();
	printf(")\n");

	// print our local alignment search statistics
	if(mFlags.UseLocalAlignmentSearch) {
		printf("\n");
		CConsole::Heading(); printf("Local alignment search statistics:\n"); CConsole::Reset();
		printf("===================================\n");

		double rescuedAlignmentsPercent = mStatisticsCounters.AdditionalLocalMates / (double)totalMates * 100.0;
		printf("rescued mates:  %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.AdditionalLocalMates, rescuedAlignmentsPercent);
	}

	// print our alignment statistics (reads)
	if(isPairedEnd) {
		printf("\n");
		CConsole::Heading(); printf("Alignment statistics (reads):\n"); CConsole::Reset();
		printf("============================================\n");
		printf("# unaligned:             %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.UnalignedReads,     (mStatisticsCounters.UnalignedReads     / (double)numReadArchiveReads) * 100.0);
		printf("# orphaned:              %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.OrphanedReads,      (mStatisticsCounters.OrphanedReads      / (double)numReadArchiveReads) * 100.0);
		printf("# both mates unique:     %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.BothUniqueReads,    (mStatisticsCounters.BothUniqueReads    / (double)numReadArchiveReads) * 100.0);
		printf("# one mate non-unique:   %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.OneNonUniqueReads,  (mStatisticsCounters.OneNonUniqueReads  / (double)numReadArchiveReads) * 100.0);
		printf("# both mates non-unique: %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.BothNonUniqueReads, (mStatisticsCounters.BothNonUniqueReads / (double)numReadArchiveReads) * 100.0);
		printf("--------------------------------------------\n");
		printf("total reads:             ");
		CConsole::Bold(); printf("%9llu", (unsigned long long)numReadArchiveReads); CConsole::Reset();
		printf("\n");
		printf("total reads aligned:     ");
		CConsole::Bold(); printf("%9llu", (unsigned long long)totalAlignedReads); CConsole::Reset();
		printf(" (");
		CConsole::Bold(); printf("%5.1f %%", (totalAlignedReads / (double)numReadArchiveReads) * 100.0); CConsole::Reset();
		printf(")\n");
	}

	// print our jump cache statistics
	if(mFlags.IsUsingJumpDB && (mSettings.NumCachedHashes > 0)) {
		printf("\n");
		CConsole::Heading(); printf("Jump database cache statistics:\n"); CConsole::Reset();
		printf("====================================\n");

		uint64_t cacheHits = 0, cacheMisses = 0, cacheTotal = 0;
		CJumpDnaHash* pJump = (CJumpDnaHash*)mpDNAHash;
		pJump->GetCacheStatistics(cacheHits, cacheMisses);

		cacheTotal = cacheHits + cacheMisses;
		double cacheHitsPercent = cacheHits / (double)cacheTotal * 100.0;

		printf("cache hits:   %10llu (%5.1f %%)\n", (unsigned long long)cacheHits, cacheHitsPercent);
		printf("cache misses: %10llu\n", (unsigned long long)cacheMisses);
	}

	printf("\n");
	CConsole::Heading(); printf("Miscellaneous statistics:\n"); CConsole::Reset();
	printf("==================================\n");
	printf("aligned mate bp:        %10llu\n", (unsigned long long)mStatisticsCounters.MateBasesAligned);
	printf("alignment candidates/s: %10.1f\n", mStatisticsCounters.AlignmentCandidates / alignmentBench.GetElapsedWallTime());
	}
	*/
	
}
Ejemplo n.º 2
0
// print our statistics
void CMosaikAligner::PrintStatistics () {

	MosaikReadFormat::CReadReader in;
        string inputReadArchiveFilename  = mSettings.InputReadArchiveFilename;
	in.Open(inputReadArchiveFilename);

	const uint64_t numReadArchiveReads = in.GetNumReads();
	ReadStatus readStatus        = in.GetStatus();
	const bool isPairedEnd = (readStatus == RS_PAIRED_END_READ ? true : false);
	const uint64_t totalMates = isPairedEnd ? numReadArchiveReads * 2 : numReadArchiveReads;
	
	if ( mFlags.UseLowMemory ) {
		mStatisticsCounters.ShortMates       = 0;
		mStatisticsCounters.FailedHashMates  = 0;
		mStatisticsCounters.UnalignedReads   = numReadArchiveReads - mStatisticsCounters.AlignedReads;
		mStatisticsCounters.FilteredOutMates = mStatisticsCounters.UnalignedReads;
	}

	const uint64_t totalAlignedMates = mStatisticsCounters.UniqueMates + mStatisticsCounters.NonUniqueMates;
	const uint64_t totalAlignedReads = mStatisticsCounters.AlignedReads;

	// print our alignment statistics (mates) if don't enable low-memory algorithm

	printf("\n");
	CConsole::Heading(); printf("Alignment statistics (mates):\n"); CConsole::Reset();
	printf("===================================\n");

	if(mStatisticsCounters.ShortMates > 0)
		printf("# too short:    %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.ShortMates,       (mStatisticsCounters.ShortMates       / (double)totalMates) * 100.0);

	if(mStatisticsCounters.FailedHashMates > 0)
		printf("# failed hash:  %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.FailedHashMates,  (mStatisticsCounters.FailedHashMates  / (double)totalMates) * 100.0);

	if(mStatisticsCounters.FilteredOutMates > 0)
		printf("# filtered out: %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.FilteredOutMates, (mStatisticsCounters.FilteredOutMates / (double)totalMates) * 100.0);

	if(mStatisticsCounters.UniqueMates > 0)
		printf("# unique:       %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.UniqueMates,      (mStatisticsCounters.UniqueMates      / (double)totalMates) * 100.0);

	if(mStatisticsCounters.NonUniqueMates > 0)
		printf("# non-unique:   %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.NonUniqueMates,   (mStatisticsCounters.NonUniqueMates   / (double)totalMates) * 100.0);

	printf("-----------------------------------\n");
	printf("total:          %9llu\n", (unsigned long long)totalMates);
	printf("total aligned:  ");
	CConsole::Bold(); printf("%9llu", (unsigned long long)totalAlignedMates); CConsole::Reset();
	printf(" (");
	CConsole::Bold(); printf("%5.1f %%", (totalAlignedMates / (double)totalMates) * 100.0); CConsole::Reset();
	printf(")\n");

	// print our local alignment search statistics
	// we don't print out local alignment information when the low-memory approach is enabled.
	if( !mFlags.UseLowMemory && mFlags.UseLocalAlignmentSearch ) {
		printf("\n");
		CConsole::Heading(); printf("Local alignment search statistics:\n"); CConsole::Reset();
		printf("===================================\n");

		double rescuedAlignmentsPercent = mStatisticsCounters.AdditionalLocalMates / (double)totalMates * 100.0;
		printf("rescued mates:  %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.AdditionalLocalMates, rescuedAlignmentsPercent);
	}

	// print our alignment statistics (reads)
	if(isPairedEnd) {
		printf("\n");
		CConsole::Heading(); printf("Alignment statistics (reads):\n"); CConsole::Reset();
		printf("============================================\n");
		printf("# unaligned:             %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.UnalignedReads,     (mStatisticsCounters.UnalignedReads     / (double)numReadArchiveReads) * 100.0);
		printf("# orphaned:              %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.OrphanedReads,      (mStatisticsCounters.OrphanedReads      / (double)numReadArchiveReads) * 100.0);
		printf("# both mates unique:     %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.BothUniqueReads,    (mStatisticsCounters.BothUniqueReads    / (double)numReadArchiveReads) * 100.0);
		printf("# one mate non-unique:   %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.OneNonUniqueReads,  (mStatisticsCounters.OneNonUniqueReads  / (double)numReadArchiveReads) * 100.0);
		printf("# both mates non-unique: %9llu (%5.1f %%)\n", (unsigned long long)mStatisticsCounters.BothNonUniqueReads, (mStatisticsCounters.BothNonUniqueReads / (double)numReadArchiveReads) * 100.0);
		printf("--------------------------------------------\n");
		printf("total reads:             ");
		CConsole::Bold(); printf("%9llu", (unsigned long long)numReadArchiveReads); CConsole::Reset();
		printf("\n");
		printf("total reads aligned:     ");
		CConsole::Bold(); printf("%9llu", (unsigned long long)totalAlignedReads); CConsole::Reset();
		printf(" (");
		CConsole::Bold(); printf("%5.1f %%", (totalAlignedReads / (double)numReadArchiveReads) * 100.0); CConsole::Reset();
		printf(")\n");
	}

	// print our jump cache statistics
	if( !mFlags.UseLowMemory && mFlags.IsUsingJumpDB && (mSettings.NumCachedHashes > 0)) {
		printf("\n");
		CConsole::Heading(); printf("Jump database cache statistics:\n"); CConsole::Reset();
		printf("====================================\n");

		uint64_t cacheHits = 0, cacheMisses = 0, cacheTotal = 0;
		CJumpDnaHash* pJump = (CJumpDnaHash*)mpDNAHash;
		pJump->GetCacheStatistics(cacheHits, cacheMisses);

		cacheTotal = cacheHits + cacheMisses;
		double cacheHitsPercent = cacheHits / (double)cacheTotal * 100.0;

		printf("cache hits:   %10llu (%5.1f %%)\n", (unsigned long long)cacheHits, cacheHitsPercent);
		printf("cache misses: %10llu\n", (unsigned long long)cacheMisses);
	}

	//if ( !mFlags.UseLowMemory ) {
	//	printf("\n");
	//	CConsole::Heading(); printf("Miscellaneous statistics:\n"); CConsole::Reset();
	//	printf("==================================\n");
	//	printf("aligned mate bp:        %10llu\n", (unsigned long long)mStatisticsCounters.MateBasesAligned);
	//	printf("alignment candidates/s: %10.1f\n", mStatisticsCounters.AlignmentCandidates / alignmentBench.GetElapsedWallTime());
	//}
	
	
	
}
Ejemplo n.º 3
0
void CMosaikAligner::AlignReadArchiveLowMemory(void) {

	// ==============
	// initialization
	// ==============

	// retrieve the concatenated reference sequence length
	// vector<ReferenceSequence> referenceSequences;

	MosaikReadFormat::CReferenceSequenceReader refseq;
	refseq.Open(mSettings.ReferenceFilename);
	refseq.GetReferenceSequences(referenceSequences);
	mReferenceLength = refseq.GetReferenceSequenceLength();
	const unsigned int numRefSeqs = refseq.GetNumReferenceSequences();
	refseq.Close();

	// retrieve the basespace reference filenames
	//char** pBsRefSeqs = NULL;
	if(mFlags.EnableColorspace) {
		
		MosaikReadFormat::CReferenceSequenceReader bsRefSeq;
		bsRefSeq.Open(mSettings.BasespaceReferenceFilename);

		if(!bsRefSeq.HasSameReferenceSequences(referenceSequences)) {
			printf("ERROR: The basespace and colorspace reference sequence archives do not seem to represent the same FASTA file.\n"); 
			exit(1);
		}

		bsRefSeq.Close();
	}

	// initialize our hash tables
	//InitializeHashTables(CalculateHashTableSize(mReferenceLength, mSettings.HashSize));

	// hash the concatenated reference sequence
	//if(!mFlags.IsUsingJumpDB) {
	//	InitializeHashTables(CalculateHashTableSize(mReferenceLength, mSettings.HashSize), 0, 0, 0);
	//	HashReferenceSequence(refseq);
	//}

	//cout << "- loading reference sequence... ";
	//cout.flush();
	//refseq.LoadConcatenatedSequence(mReference);
	//cout << "finished." << endl;

	// create our reference sequence LUTs
	//unsigned int* pRefBegin = new unsigned int[numRefSeqs];
	//unsigned int* pRefEnd   = new unsigned int[numRefSeqs];
	//
	//for(unsigned int j = 0; j < numRefSeqs; j++) {
	//	pRefBegin[j] = referenceSequences[j].Begin;
	//	pRefEnd[j]   = referenceSequences[j].End;
	//}

	string inputReadArchiveFilename  = mSettings.InputReadArchiveFilename;
	

	if ( !mFlags.UseLowMemory ) {
		
		// prepare BS reference sequence for SOLiD data
		char** pBsRefSeqs = NULL;
		if(mFlags.EnableColorspace) {

			cout << "- loading basespace reference sequences... ";
			cout.flush();

			MosaikReadFormat::CReferenceSequenceReader bsRefSeq;
			bsRefSeq.Open(mSettings.BasespaceReferenceFilename);


			bsRefSeq.CopyReferenceSequences(pBsRefSeqs);
			bsRefSeq.Close();

			cout << "finished." << endl;
		}

		// prepare reference sequence
		refseq.Open(mSettings.ReferenceFilename);
		cout << "- loading reference sequence... ";
		cout.flush();
		refseq.LoadConcatenatedSequence(mReference);
		cout << "finished." << endl;
		refseq.Close();
		
		unsigned int* pRefBegin = new unsigned int[numRefSeqs];
		unsigned int* pRefEnd   = new unsigned int[numRefSeqs];
		for(unsigned int j = 0; j < numRefSeqs; j++) {
			pRefBegin[j] = referenceSequences[j].Begin;
			pRefEnd[j]   = referenceSequences[j].End;
		}
		
		// initialize our hash tables
		if(!mFlags.IsUsingJumpDB) {
			InitializeHashTables(CalculateHashTableSize(mReferenceLength, mSettings.HashSize), 0, 0, 0, mFlags.UseLowMemory, 0);
			HashReferenceSequence(refseq);
		}
		else {
			InitializeHashTables(CalculateHashTableSize(mReferenceLength, mSettings.HashSize), pRefBegin[0], pRefEnd[numRefSeqs - 1], 0, mFlags.UseLowMemory, 0);
			mpDNAHash->LoadKeysNPositions();
		}

		// set the hash positions threshold
		if(mFlags.IsUsingHashPositionThreshold && (mAlgorithm == CAlignmentThread::AlignerAlgorithm_ALL))
			mpDNAHash->RandomizeAndTrimHashPositions(mSettings.HashPositionThreshold);

		// localize the read archive filenames
		string outputReadArchiveFilename = mSettings.OutputReadArchiveFilename;

		// define our read format reader and writer
		MosaikReadFormat::CReadReader in;
		in.Open(inputReadArchiveFilename);
		MosaikReadFormat::ReadGroup readGroup = in.GetReadGroup();
		ReadStatus readStatus          = in.GetStatus();
		mSettings.SequencingTechnology = readGroup.SequencingTechnology;
		mSettings.MedianFragmentLength = readGroup.MedianFragmentLength;

		vector<MosaikReadFormat::ReadGroup> readGroups;
		readGroups.push_back(readGroup);

		// set the alignment status flags
		AlignmentStatus alignmentStatus = AS_UNSORTED_READ | readStatus;
		if(mMode == CAlignmentThread::AlignerMode_ALL) alignmentStatus |= AS_ALL_MODE;
		else alignmentStatus |= AS_UNIQUE_MODE;

		MosaikReadFormat::CAlignmentWriter out;
		out.Open(mSettings.OutputReadArchiveFilename.c_str(), referenceSequences, readGroups, alignmentStatus, ALIGNER_SIGNATURE);

		AlignReadArchive(in, out, pRefBegin, pRefEnd, pBsRefSeqs);

		// close open file streams
		in.Close();

		// solid references should be one-base longer after converting back to basespace
		if(mFlags.EnableColorspace) out.AdjustSolidReferenceBases();
		out.Close();

		// free memory
		if(mFlags.IsUsingJumpDB) mpDNAHash->FreeMemory();
		if(pRefBegin)  delete [] pRefBegin;
		if(pRefEnd)    delete [] pRefEnd;
		if(mReference) delete [] mReference;
		if(pBsRefSeqs) {
			for(unsigned int i = 0; i < numRefSeqs; ++i) delete [] pBsRefSeqs[i];
			delete [] pBsRefSeqs;
		}
		pRefBegin  = NULL;
		pRefEnd    = NULL;
		mReference = NULL;
		pBsRefSeqs = NULL;
	}
	else {
		// grouping reference and store information in referenceGroups vector
		// vector< pair <unsigned int, unsigned int> > referenceGroups;
		GroupReferences();
		
		// get hash statistics for adjusting mhp for each reference group and reserve memory
		vector< unsigned int > nHashs;             // the numbers of hash positions in each reference group
		vector< unsigned int > expectedMemories;   // the numbers of hashs in each reference group
		uint64_t nTotalHash;
		GetHashStatistics( nHashs, expectedMemories, nTotalHash );
		
		// align reads again per chromosome group
		for ( unsigned int i = 0; i < referenceGroups.size(); i++) {

	        	unsigned int startRef = referenceGroups[i].first;
			unsigned int endRef   = referenceGroups[i].first + referenceGroups[i].second - 1;
			CConsole::Heading();
		        if ( referenceGroups[i].second > 1 )
				cout << endl << "Aligning chromosome " << startRef + 1 << "-" << endRef + 1 << " (of " << numRefSeqs << "):" << endl;
			else
				cout << endl << "Aligning chromosome " << startRef + 1 << " (of " << numRefSeqs << "):" << endl;
		        CConsole::Reset();

			// initialize our hash tables
			// calculate expected memories for jump data
			unsigned int expectedMemory = nHashs[i] + expectedMemories[i];
			// reserve 3% more memory for unexpected usage
			expectedMemory =  expectedMemory * 1.03;

			InitializeHashTables(0, referenceSequences[startRef].Begin, referenceSequences[endRef].End, referenceSequences[startRef].Begin, mFlags.UseLowMemory, expectedMemory);

			// set the hash positions threshold
			if(mFlags.IsUsingHashPositionThreshold && (mAlgorithm == CAlignmentThread::AlignerAlgorithm_ALL)) { 
				double ratio = nHashs[i] / (double)nTotalHash;
				unsigned int positionThreshold = ceil(ratio * (double)mSettings.HashPositionThreshold);
				//cout << positionThreshold << endl;
				mpDNAHash->RandomizeAndTrimHashPositions(positionThreshold);
			}

			// load jump data
			mpDNAHash->LoadKeysNPositions();

			// set reference information
			unsigned int* pRefBegin = new unsigned int[referenceGroups[i].second];
			unsigned int* pRefEnd   = new unsigned int[referenceGroups[i].second];
			for ( unsigned int j = 0; j < referenceGroups[i].second; j++ ){
				pRefBegin[j] = referenceSequences[startRef+j].Begin - referenceSequences[startRef].Begin;
				pRefEnd[j]   = referenceSequences[startRef+j].End   - referenceSequences[startRef].Begin;
			}

			// prepare BS reference sequence for SOLiD data
			char** pBsRefSeqs = NULL;
			if(mFlags.EnableColorspace) {
	
				cout << "- loading basespace reference sequences... ";
				cout.flush();

				MosaikReadFormat::CReferenceSequenceReader bsRefSeq;
				bsRefSeq.Open(mSettings.BasespaceReferenceFilename);


				bsRefSeq.CopyReferenceSequences(pBsRefSeqs, startRef, referenceGroups[i].second);
				bsRefSeq.Close();

				cout << "finished." << endl;
			}

			// prepare reference sequence
			refseq.Open(mSettings.ReferenceFilename);
			cout << "- loading reference sequence... ";
			cout.flush();
			//refseq.LoadConcatenatedSequence(mReference);
			refseq.LoadConcatenatedSequence(mReference, startRef, referenceGroups[i].second);
			refseq.Close();

			// trim reference sequence
			//unsigned int chrLength = referenceSequences[endRef].End - referenceSequences[startRef].Begin + 1;
			//char* chrReference  = new char[ chrLength + 1 ];
			//char* mReferencePtr = mReference + referenceSequences[startRef].Begin;
			//memcpy( chrReference, mReferencePtr, chrLength);
			//chrReference[chrLength] = 0;
			//delete [] mReference;
			//mReference = chrReference;
			cout << "finished." << endl;
			
			
			// localize the read archive filenames
			// get a temporary file name
			string tempFilename;
			CFileUtilities::GetTempFilename(tempFilename);
			outputFilenames.push_back(tempFilename);

			// define our read format reader and writer
			MosaikReadFormat::CReadReader in;
			in.Open(inputReadArchiveFilename);
			MosaikReadFormat::ReadGroup readGroup = in.GetReadGroup();
			ReadStatus readStatus          = in.GetStatus();
			mSettings.SequencingTechnology = readGroup.SequencingTechnology;
			mSettings.MedianFragmentLength = readGroup.MedianFragmentLength;

			vector<MosaikReadFormat::ReadGroup> readGroups;
			readGroups.push_back(readGroup);

			// set the alignment status flags
			AlignmentStatus alignmentStatus = AS_UNSORTED_READ | readStatus;
			if(mMode == CAlignmentThread::AlignerMode_ALL) alignmentStatus |= AS_ALL_MODE;
			else alignmentStatus |= AS_UNIQUE_MODE;

			// prepare a new vector for the current chromosome for opening out archive
			vector<ReferenceSequence> smallReferenceSequences;
			for ( unsigned int j = 0; j < referenceGroups[i].second; j++ ){
				smallReferenceSequences.push_back(referenceSequences[startRef+j]);
			}

			MosaikReadFormat::CAlignmentWriter out;
			out.Open(tempFilename.c_str(), smallReferenceSequences, readGroups, alignmentStatus, ALIGNER_SIGNATURE);
			out.AdjustPartitionSize(20000/referenceGroups.size());


			AlignReadArchive(in, out, pRefBegin, pRefEnd, pBsRefSeqs);

			// close open file streams
			in.Close();

			// solid references should be one-base longer after converting back to basespace
			if(mFlags.EnableColorspace) out.AdjustSolidReferenceBases();
			out.Close();

			// free memory
			if(mFlags.IsUsingJumpDB) mpDNAHash->FreeMemory();
			if(pRefBegin)  delete [] pRefBegin;
			if(pRefEnd)    delete [] pRefEnd;
			if(mReference) delete [] mReference;
			if(pBsRefSeqs) {
				for(unsigned int j = 0; j < referenceGroups[i].second; j++)
					delete [] pBsRefSeqs[j];
				
				delete [] pBsRefSeqs;
			}
			pRefBegin  = NULL;
			pRefEnd    = NULL;
			mReference = NULL;
			pBsRefSeqs = NULL;
		}
	}

	if ( mFlags.UseLowMemory )
		MergeArchives();

	PrintStatistics();
}