コード例 #1
0
// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
void CBandedSmithWaterman::Align(Alignment& alignment, const char* s1, const unsigned int s1Length, const char* s2, const unsigned int s2Length, HashRegion& hr) {

	// determine the hash region type
	unsigned int rowOffset;
	unsigned int columnOffset;
	PositionType positionType;

	if(hr.Begin == 0) {
		if(hr.QueryBegin == 0) {
			rowOffset    = 1;
			columnOffset = (mBandwidth / 2) + 1;
			positionType = Position_REF_AND_QUERY_ZERO;
		} else {
			rowOffset    = 1 - hr.QueryBegin;
			columnOffset = (mBandwidth / 2) + 1 + hr.QueryBegin;
			positionType = Position_REF_ZERO;
		}
	} else {
		if(hr.QueryBegin == 0) {
			rowOffset    = 1;
			columnOffset = (mBandwidth / 2) + 1 - hr.Begin;
			positionType = Position_QUERY_ZERO;
		} else {
			rowOffset    = 1 - hr.QueryBegin;
			columnOffset = (mBandwidth / 2) + 1 + hr.QueryBegin - hr.Begin;
			positionType = Position_REF_AND_QUERO_NONZERO;
		}
	}

	// =========================
	// Reinitialize the matrices
	// =========================

	ReinitializeMatrices(positionType, s1Length, s2Length, hr);

	// =======================================
	// Banded Smith-Waterman forward algorithm
	// =======================================

	unsigned int bestColumn	= 0;
	unsigned int bestRow	= 0;
	float bestScore         = FLOAT_NEGATIVE_INFINITY;
	float currentQueryGapScore;

	// rowNum and column indicate the row and column numbers in the Smith-Waterman matrix respectively
	unsigned int rowNum    = hr.QueryBegin;
	unsigned int columnNum = hr.Begin;

	// indicates how many rows including blank elements in the Banded SmithWaterman
	int numBlankElements = (mBandwidth / 2) - columnNum;

	// upper triangle matrix in Banded Smith-Waterman
	for( ; numBlankElements > 0; numBlankElements--, rowNum++){
		// in the upper triangle matrix, we always start at the 0th column
		columnNum = 0;

		// columnEnd indicates how many columns which should be dealt with in the current row
		unsigned int columnEnd = min((mBandwidth - numBlankElements), (s1Length - columnNum + 1) );
		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
		for( unsigned int j = 0; j < columnEnd; j++){
			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
			columnNum++;
		}

		// replace the columnNum to the middle column in the Smith-Waterman matrix
		columnNum = columnNum - (mBandwidth / 2);
	}

	// complete matrix in Banded Smith-Waterman
	unsigned int completeNum = min((s1Length - columnNum - (mBandwidth / 2)), (s2Length - rowNum));
	for(unsigned int i = 0; i < completeNum; i++, rowNum++){
		columnNum = columnNum - (mBandwidth / 2);

		// there are mBandwidth columns which should be dealt with in each row
		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;

		for(unsigned int j = 0; j < mBandwidth; j++){
			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
			columnNum++;
		}

		// replace the columnNum to the middle column in the Smith-Waterman matrix
		// because mBandwidth is an odd number, everytime the following equation shifts a column (pluses 1).
		columnNum = columnNum - (mBandwidth / 2);
	}

	// lower triangle matrix
	numBlankElements = min(mBandwidth, (s2Length - rowNum));
	columnNum = columnNum - (mBandwidth / 2);
	for(unsigned int i = 0; numBlankElements > 0; i++, rowNum++, numBlankElements--) {

		mBestScores[ mBandwidth - i ] = FLOAT_NEGATIVE_INFINITY;;
		// columnEnd indicates how many columns which should be dealt with
		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;

		for( unsigned int j = columnNum; j < s1Length; j++){
			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
			columnNum++;
		}

		// replace the columnNum to the middle column in the Smith-Waterman matrix
		columnNum = columnNum - mBandwidth + i + 2;
	}

	// =========================================
	// Banded Smith-Waterman backtrace algorithm
	// =========================================

	Traceback(alignment, s1, s2, s2Length, bestRow, bestColumn, rowOffset, columnOffset);
}
コード例 #2
0
ファイル: BandedSmithWaterman.cpp プロジェクト: Brainiarc7/TS
// aligns the query sequence to the anchor using the Smith Waterman Gotoh algorithm
void CBandedSmithWaterman::Align(unsigned int& referenceAl, string& cigarAl, const string& s1, const string& s2, pair< pair<unsigned int, unsigned int>, pair<unsigned int, unsigned int> >& hr) {


	
	unsigned int rowStart = min(hr.first.first, (unsigned int)hr.second.first);
	hr.first.first    -= rowStart;
	hr.second.first   -= rowStart;
	
	//bool isLegalBandWidth = (s2.length() - hr.QueryBegin) > (mBandwidth / 2);
	//     isLegalBandWidth = isLegalBandWidth && ((s1.length() - hr.Begin) > (mBandwidth / 2));



	// check the lengths of the input sequences
	//if( (s1.length() <= 0) || (s2.length() <= 0) || (s1.length() < s2.length()) ) {
	//	printf("ERROR: An unexpected sequence length was encountered during pairwise alignment.\n");
	//	printf("Sequence lengths are listed as following:\n");
	//	printf("1. Reference length: %u\n2. Query length: %u\n", s1.length(), s2.length());
		//printf("3. Hash region in reference:%4u-%4u\n", hr.Begin + rowStart, hr.End);
		//printf("4. Hash region in query:    %4u-%4u\n", hr.QueryBegin + rowStart, hr.QueryEnd);
	//	exit(1);
	//}

	
	// determine the hash region type
	unsigned int rowOffset;
	unsigned int columnOffset;
	PositionType positionType;

	if(hr.first.first == 0) {
		if(hr.second.first == 0) {
			rowOffset    = 1;
			columnOffset = (mBandwidth / 2) + 1;
			positionType = Position_REF_AND_QUERY_ZERO;
		} else {
			rowOffset    = 1 - hr.second.first;
			columnOffset = (mBandwidth / 2) + 1 + hr.second.first;
			positionType = Position_REF_ZERO;
		}
	} else {
		if(hr.second.first == 0) {
			rowOffset    = 1;
			columnOffset = (mBandwidth / 2) + 1 - hr.first.first;
			positionType = Position_QUERY_ZERO;
		} else {
			rowOffset    = 1 - hr.second.first;
			columnOffset = (mBandwidth / 2) + 1 + hr.second.first - hr.first.first;
			positionType = Position_REF_AND_QUERO_NONZERO;
		}
	}

	// =========================
	// Reinitialize the matrices
	// =========================
	
	ReinitializeMatrices(positionType, s1.length(), s2.length(), hr);

	// =======================================
	// Banded Smith-Waterman forward algorithm
	// =======================================

	unsigned int bestColumn	= 0;
	unsigned int bestRow	= 0;
	float bestScore         = FLOAT_NEGATIVE_INFINITY;
	float currentQueryGapScore;

	// rowNum and column indicate the row and column numbers in the Smith-Waterman matrix respectively
	unsigned int rowNum    = hr.second.first;
	unsigned int columnNum = hr.first.first;

	// indicates how many rows including blank elements in the Banded SmithWaterman
	int numBlankElements = (mBandwidth / 2) - columnNum;

	//cout << numBlankElements << endl;
	// upper triangle matrix in Banded Smith-Waterman
	for( ; numBlankElements > 0; numBlankElements--, rowNum++){
		// in the upper triangle matrix, we always start at the 0th column
		columnNum = 0;

		// columnEnd indicates how many columns which should be dealt with in the current row
		unsigned int columnEnd = min((mBandwidth - numBlankElements), ((unsigned int) s1.length() - columnNum + 1) );
		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;
		for( unsigned int j = 0; j < columnEnd; j++){
			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
			//cout << s1[columnNum] << s2[rowNum] << score << endl;
			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
			columnNum++;
		}

		// replace the columnNum to the middle column in the Smith-Waterman matrix
		columnNum = columnNum - (mBandwidth / 2);
	}
	// complete matrix in Banded Smith-Waterman
	unsigned int completeNum = min((s1.length() - columnNum - (mBandwidth / 2)), (s2.length() - rowNum));
	//cout << completeNum << endl;
	for(unsigned int i = 0; i < completeNum; i++, rowNum++){
		columnNum = columnNum - (mBandwidth / 2);

		// there are mBandwidth columns which should be dealt with in each row
		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;

		for(unsigned int j = 0; j < mBandwidth; j++){
			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
			//cout << s1[columnNum] << s2[rowNum] << score << endl;
			columnNum++;
		}

		// replace the columnNum to the middle column in the Smith-Waterman matrix
		// because mBandwidth is an odd number, everytime the following equation shifts a column (pluses 1).
		columnNum = columnNum - (mBandwidth / 2);
	}
	
	// lower triangle matrix
	numBlankElements = min(mBandwidth, ((unsigned int) s2.length() - rowNum));
	columnNum = columnNum - (mBandwidth / 2);
	for(unsigned int i = 0; numBlankElements > 0; i++, rowNum++, numBlankElements--) {

		mBestScores[ mBandwidth - i ] = FLOAT_NEGATIVE_INFINITY;;
		// columnEnd indicates how many columns which should be dealt with
		currentQueryGapScore = FLOAT_NEGATIVE_INFINITY;

		for( unsigned int j = columnNum; j < s1.length(); j++){
			float score = CalculateScore(s1, s2, rowNum, columnNum, currentQueryGapScore, rowOffset, columnOffset);
			UpdateBestScore(bestRow, bestColumn, bestScore, rowNum, columnNum, score);
			//cout << s1[columnNum] << s2[rowNum] << score << endl;
			columnNum++;
		}

		// replace the columnNum to the middle column in the Smith-Waterman matrix
		columnNum = columnNum - mBandwidth + i + 2;
	}

	// =========================================
	// Banded Smith-Waterman backtrace algorithm
	// =========================================

	Traceback(referenceAl, cigarAl, s1, s2, bestRow, bestColumn, rowOffset, columnOffset);

}