Пример #1
0
int main(int argc, char* argv[]) {
	if (argc < 3) {
		cout << "usage: samatcher queryfile targetfile" <<endl;
		exit(1);
	}

	string queryFileName, targetFileName;
	int minMatchLength = 5;
	int maxExpand      = 0;
	queryFileName  = argv[1];
	targetFileName = argv[2];
	int argi = 3;
	AnchorParameters anchorParams;
	
	while (argi < argc) {
		if (strcmp(argv[argi], "-minmatch") == 0) {
			anchorParams.minMatchLength = atoi(argv[++argi]);
		}
		else if (strcmp(argv[argi], "-maxexpand") == 0) {
			anchorParams.expand = atoi(argv[++argi]);
		}
		else {
			cout << "ERROR! Invalid argument: " << argv[argi]<< endl;
			exit(1);
		}
		++argi;
	}

	FASTQSequence query, target;
	FASTAReader queryReader, targetReader;
	queryReader.Init(queryFileName);
	targetReader.Init(targetFileName);


	while(1) {
		if (!queryReader.GetNext(query)) break;
		if (!targetReader.GetNext(target)) break;

		query.ToUpper();
		target.ToUpper();

		//
		// Build the suffix array on the target.
		//
		DNASuffixArray sarray;
		target.ToThreeBit();		
		vector<int> alphabet;
		sarray.InitThreeBitDNAAlphabet(alphabet);
		sarray.LarssonBuildSuffixArray(target.seq, target.length, alphabet);
		cout <<"done building suffix array." << endl;
		target.ToAscii();

		//
		// Find the list of anchors.
		//
	
		query.PrintSeq(cout);
		cout << "target: " << endl;
		target.PrintSeq(cout);
		MatchPosList matchPosList;
		int numKeysMatched;
		anchorParams.useLookupTable = false;
		numKeysMatched   = 
			MapReadToGenome(target, sarray, query, sarray.lookupPrefixLength,
											matchPosList, anchorParams);
	
		//
		// Now, convert the matchPosList to a set of fragments
		// that can be used in the sdp.
		//
		SortMatchPosList(matchPosList);
		vector<ChainedFragment> fragments;
		fragments.resize(matchPosList.size());
		VectorIndex i;
		for (i = 0; i < matchPosList.size(); i++) {
			fragments[i].x = matchPosList[i].t;
			fragments[i].y = matchPosList[i].q;
			fragments[i].length = fragments[i].weight = matchPosList[i].w;
			//		cout << fragments[i].x << " " << fragments[i].y << " " << fragments[i].weight << endl;
		}
		cout << "stored a total of: " << fragments.size() << " fragments." << endl;

		int maxFragmentChainLength;
		vector<DNALength> maxFragmentChain;
	
		maxFragmentChainLength = GlobalChain<ChainedFragment, BasicEndpoint<ChainedFragment> >(fragments, maxFragmentChain);
		MatchedAlignment alignment;		
		std::reverse(maxFragmentChain.begin(), maxFragmentChain.end());
		alignment.AllocateBlocks(maxFragmentChain.size());
		for (i = 0; i < maxFragmentChain.size(); i++) {
			alignment.blocks[i].qPos = fragments[maxFragmentChain[i]].y;
			alignment.blocks[i].tPos = fragments[maxFragmentChain[i]].x;
			alignment.blocks[i].length = fragments[maxFragmentChain[i]].length;
			cout << "( " << fragments[maxFragmentChain[i]].x << " "
					 << fragments[maxFragmentChain[i]].y << " "
					 << fragments[maxFragmentChain[i]].length << ") ";
		}
		cout << endl;
		alignment.tStart = alignment.qStart = 0;
		alignment.tPos = alignment.qPos = 0;
		StickPrintAlignment(alignment, query, target, cout);
	}
	return 0;
}
Пример #2
0
void StoreNonOverlappingIndices(std::vector<T_MatchPos> &lis, 
    std::vector<T_MatchPos> &noOvpLis) {
	unsigned int i;

	//
	// Greedily add lis matches according to weight.  A match may be added
	// as long as it does not overlap with any other matches.
	//
 
	// do nothing on empty lists
	if (lis.empty()) return;

	//
	// First build a list of matches sorted by weight.
	SortMatchPosListByWeight(lis);

	//
	// The first match is guaranteed to not overlap.
	noOvpLis.push_back(lis[0]);

	//
	// Nothing is overlapping, and everything is sorted when there is 
	// just one value.
	if (lis.size() == 1) return;

	//
	// Next, add matches as long as they do not overlap.
	for (i = 1; i < lis.size(); i++ ){
		VectorIndex j;
		int lts = lis[i].t;
		int lte = lis[i].t + lis[i].GetLength();
		int lqs = lis[i].q;
		int lqe = lis[i].q + lis[i].GetLength();

		int ovpFound = 0;
		for (j =0; j < noOvpLis.size(); j++ ){
			int tIntvStart = noOvpLis[j].t;
			int tIntvEnd   = noOvpLis[j].t + noOvpLis[j].GetLength();
			int qIntvStart = noOvpLis[j].q;
			int qIntvEnd   = noOvpLis[j].q + noOvpLis[j].GetLength();
			if ((lts >= tIntvStart and lts < tIntvEnd) or 
					(lte >  tIntvStart and lte <= tIntvEnd) or
					(lqs >= qIntvStart and lqs < qIntvEnd) or
					(lqe >  qIntvStart and lqe <= qIntvEnd)) {
				ovpFound = 1;
				break;
			}
		}
		if (!ovpFound) {
			noOvpLis.push_back(lis[i]);
		}
	}
	
	//
	// Now, the matches are found in order of size, but they need to
	// be stored in order of text.
	//
	SortMatchPosList(noOvpLis);

	//
	// The match pos list was sorted in order of weight. 
	// Just in case it causes problems down the line, re-sort it 
	// according to query pos.
	//
	lis = noOvpLis;
	SortMatchPosList(lis);
}