int main(int argc, char* argv[]) {
	
	string queryFileName, targetFileName;
	if (argc < 3) {
		cout << "Usage: guidedalign query target [sdptuple]" << endl;
		exit(1);
	}
	queryFileName = argv[1];
	targetFileName = argv[2];
	int sdpTupleSize = 4;
	if (argc > 3) {
		sdpTupleSize = atoi(argv[3]);
	}
	
	ReaderAgglomerate reader;
	FASTQSequence query, target;

	reader.Initialize(queryFileName);
	reader.GetNext(query);
	reader.Close();
	reader.Initialize(targetFileName);
	reader.GetNext(target);
	reader.Close();
	
	int alignScore;
	/*
	Alignment sdpAlignment;
	int nSDPHits = 0;
	alignScore = SDPAlign(query, target,
												SMRTDistanceMatrix, 
												4, 4, sdpTupleSize, 4, 0.90,
												sdpAlignment, nSDPHits, Local, false, false);
	int b;
	for (b = 0; b < sdpAlignment.blocks.size(); b++) {
		sdpAlignment.blocks[b].qPos += sdpAlignment.qPos;
		sdpAlignment.blocks[b].tPos += sdpAlignment.tPos;
		}
	Guide guide;
	int bandSize = 16;
	AlignmentToGuide(sdpAlignment, guide, bandSize);
	StoreMatrixOffsets(guide);
	int guideSize = ComputeMatrixNElem(guide);
	int i;
	*/

	vector<int> scoreMat;
	vector<Arrow> pathMat;
	vector<double> probMat, optPathProbMat;
  vector<float> lnSubVect, lnInsVect, lnDelVect, lnMatchVect;
  //	AlignmentCandidate<FASTASequence, FASTASequence> alignment;
  Alignment alignment;
	DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn;
	distScoreFn.del = 3;
	distScoreFn.ins = 3;
	distScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix);

	alignScore = GuidedAlign(query, target, distScoreFn, 10,
                           // in order after edit distance:
                           // pairwise-ins, pairwise-del, k, sdp-ins, sdp-del, sdp-insrate
                           //                           distScoreFn, 

                           5,5,.15,
                           alignment, Local, false, 8);
  //	StickPrintAlignment(alignment, query, target, cout);
}
Beispiel #2
0
int main(int argc, char* argv[]) {
    if (argc < 4) {
        PrintUsage();
        exit(1);
    }

    string queryName, targetName;
    queryName = argv[1];
    targetName = argv[2];
    TupleMetrics tm;
    tm.Initialize(atoi(argv[3]));
    int argi = 4;
    float indelRate = 0.25;
    int indel = 3;
    int match = 0;
    int printSW = 0;
    int refineAlignments = 1;
    int showalign = 0;
    int fixedTarget = 0;
    int sdpIndel = indel;
    int sdpIns = 5;
    int sdpDel = 5;
    AlignmentType alignType = Global;
    while (argi < argc) {
        if (strcmp(argv[argi], "-indelRate") == 0) {
            ++argi;
            indelRate = atof(argv[argi]);
        }
        else if (strcmp(argv[argi], "-printsw") == 0) {
            printSW = 1;
        }
        else if (strcmp(argv[argi], "-noRefine") == 0) {
            refineAlignments = 0;
        }
        else if (strcmp(argv[argi], "-indel") == 0) {
            indel = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-sdpIndel") == 0) {
            sdpIndel = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-sdpIns") == 0) {
            sdpIns  = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-sdpDel") == 0) {
            sdpDel  = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-showalign") == 0) {
            showalign = 1;
        }
        else if (strcmp(argv[argi], "-local") == 0) {
            alignType = Local;
        }
        else if (strcmp(argv[argi], "-match") == 0) {
            match = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-fixedtarget") == 0) {
            fixedTarget = 1;
        }
        else {
            PrintUsage();
            cout << "Bad option: " << argv[argi] << endl;
            exit(1);
        }
        ++argi;
    }

    FASTASequence query, target;
    FASTAReader queryReader, targetReader;
    queryReader.Init(queryName);
    
    targetReader.Init(targetName);

    if (match != 0) {
        int i;
        for (i = 0; i < 4; i++ ){
            LocalAlignLowMutationMatrix[i][i] = match;
        }
    }

    int seqIndex = 0;
    Alignment alignment;
    vector<int> scoreMat;
    vector<Arrow> pathMat;
    DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn;
    distScoreFn.del = indel;
    distScoreFn.ins = indel;
    distScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix);

    if (fixedTarget) {
        targetReader.GetNext(target);
    }

    cout << "qid,tid,qstart,qend,qlen,tstart,tend,tlen,score" << endl;
    while (queryReader.GetNext(query) and 
           (fixedTarget or targetReader.GetNext(target))) {
        
        if (query.length == 0 or target.length == 0)
            continue; 
        alignment.blocks.clear();

        int alignScore;
        alignScore = SDPAlign(query, target,
                              distScoreFn, tm.tupleSize, 
                              sdpIndel, sdpIndel, indelRate, 
                              alignment, 
                              alignType,
                              refineAlignments,
                              false,
                              0);

        if (alignScore > 0){ // in rare cases the SDP returns positive. 
            alignScore = 0;  // this makes it more like a true local alignment
        }                   

        if (showalign) {
            StickPrintAlignment(alignment, query, target, cout);
        }

        if (printSW) {
            MatchedAlignment swAlignment;
            vector<int> scoreMat;
            vector<Arrow> pathMat;
            SWAlign(query, target, scoreMat, pathMat, swAlignment, distScoreFn);        
            StickPrintAlignment(swAlignment, query, target, cout);
        }

        cout << query.GetName()  << "," << target.GetName() << "," 
             << alignment.qPos << "," << alignment.QEnd()   << "," 
             << query.length  << "," << alignment.tPos << "," 
             << alignment.TEnd()   << "," << target.length << "," 
             << alignScore << endl;

        ++seqIndex;
    }

    return 0;
}
Beispiel #3
0
int main(int argc, char* argv[]) {
	string fileAName, fileBName;
	if (argc < 3) {
		cout << "usage: extendAlign file1 fil2 [pos1 pos2] " << endl;
		exit(0);
	}

	fileAName = argv[1];
	fileBName = argv[2];
	int argi = 3;
	int aPos = 0;
	int bPos = 0;
	if (argc == 5) {
		aPos = atoi(argv[3]);
		bPos = atoi(argv[4]);
	}
	
	ReaderAgglomerate reader;
	reader.Initialize(fileAName);
	
	FASTASequence aSeq, bSeq;
	reader.GetNext(aSeq);
	reader.Initialize(fileBName);
	reader.GetNext(bSeq);
	
	DistanceMatrixScoreFunction<FASTASequence, FASTASequence> scoreFn;
	scoreFn.ins = 3;
	scoreFn.del = 3;
	scoreFn.InitializeScoreMatrix(SMRTDistanceMatrix);

	vector<int>  scoreMat;
	vector<Arrow>pathMat;
	
	AlignmentCandidate<FASTASequence, FASTASequence> extendedAlignment;

	/*	ExtendAlignmentForward(aSeq, aPos,
												 bSeq, bPos,
												 5, //k
												 scoreMat, pathMat,
												 extendedAlignment,
												 scoreFn,
												 1, // don't bother attempting
												 // to extend the alignment
												 // if one of the sequences
												 // is less than 1 base long
												 2);

	extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq);
	extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq);

	//	extendedAlignment.qAlignedSeqPos = aPos;
	//	extendedAlignment.tAlignedSeqPos = bPos;

	StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout);
	extendedAlignment.Clear();
	*/
	if (aPos == 0) { aPos = aSeq.length; }
	if (bPos == 0) { bPos = bSeq.length; }

	ExtendAlignmentReverse(aSeq, aPos,
												 bSeq, bPos,
												 5, //k
												 scoreMat, pathMat,
												 extendedAlignment,
												 scoreFn,
												 1, // don't bother attempting
												 // to extend the alignment
												 // if one of the sequences
												 // is less than 1 base long
												 2);

	extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq);
	extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq);

	//	extendedAlignment.qAlignedSeqPos = aPos;
	//	extendedAlignment.tAlignedSeqPos = bPos;

	StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout);

	return 0;
}
Beispiel #4
0
void KeywordSeededAlignment(Data *data) {
	FASTQSequence genomeSubstring;
	DNATuple genomeTuple;
  DNALength genomePos;
	ReadKeyword genomeKeyword;
	std::vector<ReadKeyword>::iterator keyIt, upKeyIt;

	//
	// Scan the genome.
	//

	vector<int> scoreMat;
	vector<Arrow> pathMat;
	vector<Arrow> hpInsPathMat, insPathMat;
	vector<int> hpInsScoreMat, insScoreMat;
	DistanceMatrixScoreFunction<DNASequence, FASTQSequence> distanceMatrixScoreFn;
	distanceMatrixScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix);
	distanceMatrixScoreFn.del = 6;
	distanceMatrixScoreFn.ins = 6;
	genomePos = 0;


	for (genomePos = 0; genomePos < data->genome->length - data->tm->tupleSize + 1; genomePos++) {
		genomeKeyword.tuple.FromStringLR(&data->genome->seq[genomePos], *data->tm);
		keyIt = lower_bound(data->keywords->begin(), data->keywords->end(), genomeKeyword);
		upKeyIt  = upper_bound(data->keywords->begin(), data->keywords->end(), genomeKeyword);
		//
		// Find all the reads and all the positions in reads that
		// have this keyword.
		for (; keyIt != upKeyIt; keyIt++ ){
			DNALength prefixLength = (*keyIt).readPos * data->insRate;
			DNALength substringLength = (*data->reads)[(*keyIt).readIndex].length * data->insRate;
			DNALength substringPos;
			if (genomePos < substringLength) {
				substringPos = 0;
			}
			else {
				substringPos = genomePos - prefixLength;
			}
			//
			// Do not bother aligning the read again if it aligns to the same position.
			//
  		if ((*data->prevAlignedGenomePos)[(*keyIt).readIndex] == substringPos)
				continue;

			if (substringPos + substringLength > data->genome->length) {
				substringLength = data->genome->length - substringPos;
			}

			genomeSubstring.seq = &data->genome->seq[substringPos];
			genomeSubstring.length = substringLength;
			FastqAlignment alignment;
			int readIndex = (*keyIt).readIndex;
			int alignScore;
			alignScore = KBandAlign((*data->reads)[readIndex], genomeSubstring, SMRTDistanceMatrix, 
															6, // ins
															6, // del
															0.30*(*data->reads)[readIndex].length,
															insScoreMat, insPathMat,
															alignment, distanceMatrixScoreFn, QueryFit);
			if (alignScore < (*data->readOptScore)[readIndex]) {
				(*data->readOptScore)[readIndex] = alignScore;
				(*data->optAlignment)[readIndex] = alignment;
				(*data->optAlignment)[readIndex].tAlignedSeqPos = substringPos;
				(*data->optGenomeAlignPos)[readIndex] = substringPos;
				(*data->optGenomeAlignLength)[readIndex] = substringLength;
			}
			(*data->prevAlignedGenomePos)[readIndex] = substringPos;
			/*
				cout << genomePos << " read: " << readIndex 
				<< " readpos: " << (*keyIt).readPos << " score " << alignScore << endl;
			*/
		}
		if (genomePos % 1000 == 0) {
			cerr << genomePos << endl;
		}
  }
}