int main(int argc, char* argv[]) { string queryFileName, targetFileName; if (argc < 3) { cout << "Usage: guidedalign query target [sdptuple]" << endl; exit(1); } queryFileName = argv[1]; targetFileName = argv[2]; int sdpTupleSize = 4; if (argc > 3) { sdpTupleSize = atoi(argv[3]); } ReaderAgglomerate reader; FASTQSequence query, target; reader.Initialize(queryFileName); reader.GetNext(query); reader.Close(); reader.Initialize(targetFileName); reader.GetNext(target); reader.Close(); int alignScore; /* Alignment sdpAlignment; int nSDPHits = 0; alignScore = SDPAlign(query, target, SMRTDistanceMatrix, 4, 4, sdpTupleSize, 4, 0.90, sdpAlignment, nSDPHits, Local, false, false); int b; for (b = 0; b < sdpAlignment.blocks.size(); b++) { sdpAlignment.blocks[b].qPos += sdpAlignment.qPos; sdpAlignment.blocks[b].tPos += sdpAlignment.tPos; } Guide guide; int bandSize = 16; AlignmentToGuide(sdpAlignment, guide, bandSize); StoreMatrixOffsets(guide); int guideSize = ComputeMatrixNElem(guide); int i; */ vector<int> scoreMat; vector<Arrow> pathMat; vector<double> probMat, optPathProbMat; vector<float> lnSubVect, lnInsVect, lnDelVect, lnMatchVect; // AlignmentCandidate<FASTASequence, FASTASequence> alignment; Alignment alignment; DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn; distScoreFn.del = 3; distScoreFn.ins = 3; distScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix); alignScore = GuidedAlign(query, target, distScoreFn, 10, // in order after edit distance: // pairwise-ins, pairwise-del, k, sdp-ins, sdp-del, sdp-insrate // distScoreFn, 5,5,.15, alignment, Local, false, 8); // StickPrintAlignment(alignment, query, target, cout); }
int main(int argc, char* argv[]) { if (argc < 4) { PrintUsage(); exit(1); } string queryName, targetName; queryName = argv[1]; targetName = argv[2]; TupleMetrics tm; tm.Initialize(atoi(argv[3])); int argi = 4; float indelRate = 0.25; int indel = 3; int match = 0; int printSW = 0; int refineAlignments = 1; int showalign = 0; int fixedTarget = 0; int sdpIndel = indel; int sdpIns = 5; int sdpDel = 5; AlignmentType alignType = Global; while (argi < argc) { if (strcmp(argv[argi], "-indelRate") == 0) { ++argi; indelRate = atof(argv[argi]); } else if (strcmp(argv[argi], "-printsw") == 0) { printSW = 1; } else if (strcmp(argv[argi], "-noRefine") == 0) { refineAlignments = 0; } else if (strcmp(argv[argi], "-indel") == 0) { indel = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-sdpIndel") == 0) { sdpIndel = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-sdpIns") == 0) { sdpIns = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-sdpDel") == 0) { sdpDel = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-showalign") == 0) { showalign = 1; } else if (strcmp(argv[argi], "-local") == 0) { alignType = Local; } else if (strcmp(argv[argi], "-match") == 0) { match = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-fixedtarget") == 0) { fixedTarget = 1; } else { PrintUsage(); cout << "Bad option: " << argv[argi] << endl; exit(1); } ++argi; } FASTASequence query, target; FASTAReader queryReader, targetReader; queryReader.Init(queryName); targetReader.Init(targetName); if (match != 0) { int i; for (i = 0; i < 4; i++ ){ LocalAlignLowMutationMatrix[i][i] = match; } } int seqIndex = 0; Alignment alignment; vector<int> scoreMat; vector<Arrow> pathMat; DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn; distScoreFn.del = indel; distScoreFn.ins = indel; distScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix); if (fixedTarget) { targetReader.GetNext(target); } cout << "qid,tid,qstart,qend,qlen,tstart,tend,tlen,score" << endl; while (queryReader.GetNext(query) and (fixedTarget or targetReader.GetNext(target))) { if (query.length == 0 or target.length == 0) continue; alignment.blocks.clear(); int alignScore; alignScore = SDPAlign(query, target, distScoreFn, tm.tupleSize, sdpIndel, sdpIndel, indelRate, alignment, alignType, refineAlignments, false, 0); if (alignScore > 0){ // in rare cases the SDP returns positive. alignScore = 0; // this makes it more like a true local alignment } if (showalign) { StickPrintAlignment(alignment, query, target, cout); } if (printSW) { MatchedAlignment swAlignment; vector<int> scoreMat; vector<Arrow> pathMat; SWAlign(query, target, scoreMat, pathMat, swAlignment, distScoreFn); StickPrintAlignment(swAlignment, query, target, cout); } cout << query.GetName() << "," << target.GetName() << "," << alignment.qPos << "," << alignment.QEnd() << "," << query.length << "," << alignment.tPos << "," << alignment.TEnd() << "," << target.length << "," << alignScore << endl; ++seqIndex; } return 0; }
int main(int argc, char* argv[]) { string fileAName, fileBName; if (argc < 3) { cout << "usage: extendAlign file1 fil2 [pos1 pos2] " << endl; exit(0); } fileAName = argv[1]; fileBName = argv[2]; int argi = 3; int aPos = 0; int bPos = 0; if (argc == 5) { aPos = atoi(argv[3]); bPos = atoi(argv[4]); } ReaderAgglomerate reader; reader.Initialize(fileAName); FASTASequence aSeq, bSeq; reader.GetNext(aSeq); reader.Initialize(fileBName); reader.GetNext(bSeq); DistanceMatrixScoreFunction<FASTASequence, FASTASequence> scoreFn; scoreFn.ins = 3; scoreFn.del = 3; scoreFn.InitializeScoreMatrix(SMRTDistanceMatrix); vector<int> scoreMat; vector<Arrow>pathMat; AlignmentCandidate<FASTASequence, FASTASequence> extendedAlignment; /* ExtendAlignmentForward(aSeq, aPos, bSeq, bPos, 5, //k scoreMat, pathMat, extendedAlignment, scoreFn, 1, // don't bother attempting // to extend the alignment // if one of the sequences // is less than 1 base long 2); extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq); extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq); // extendedAlignment.qAlignedSeqPos = aPos; // extendedAlignment.tAlignedSeqPos = bPos; StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout); extendedAlignment.Clear(); */ if (aPos == 0) { aPos = aSeq.length; } if (bPos == 0) { bPos = bSeq.length; } ExtendAlignmentReverse(aSeq, aPos, bSeq, bPos, 5, //k scoreMat, pathMat, extendedAlignment, scoreFn, 1, // don't bother attempting // to extend the alignment // if one of the sequences // is less than 1 base long 2); extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq); extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq); // extendedAlignment.qAlignedSeqPos = aPos; // extendedAlignment.tAlignedSeqPos = bPos; StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout); return 0; }
void KeywordSeededAlignment(Data *data) { FASTQSequence genomeSubstring; DNATuple genomeTuple; DNALength genomePos; ReadKeyword genomeKeyword; std::vector<ReadKeyword>::iterator keyIt, upKeyIt; // // Scan the genome. // vector<int> scoreMat; vector<Arrow> pathMat; vector<Arrow> hpInsPathMat, insPathMat; vector<int> hpInsScoreMat, insScoreMat; DistanceMatrixScoreFunction<DNASequence, FASTQSequence> distanceMatrixScoreFn; distanceMatrixScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix); distanceMatrixScoreFn.del = 6; distanceMatrixScoreFn.ins = 6; genomePos = 0; for (genomePos = 0; genomePos < data->genome->length - data->tm->tupleSize + 1; genomePos++) { genomeKeyword.tuple.FromStringLR(&data->genome->seq[genomePos], *data->tm); keyIt = lower_bound(data->keywords->begin(), data->keywords->end(), genomeKeyword); upKeyIt = upper_bound(data->keywords->begin(), data->keywords->end(), genomeKeyword); // // Find all the reads and all the positions in reads that // have this keyword. for (; keyIt != upKeyIt; keyIt++ ){ DNALength prefixLength = (*keyIt).readPos * data->insRate; DNALength substringLength = (*data->reads)[(*keyIt).readIndex].length * data->insRate; DNALength substringPos; if (genomePos < substringLength) { substringPos = 0; } else { substringPos = genomePos - prefixLength; } // // Do not bother aligning the read again if it aligns to the same position. // if ((*data->prevAlignedGenomePos)[(*keyIt).readIndex] == substringPos) continue; if (substringPos + substringLength > data->genome->length) { substringLength = data->genome->length - substringPos; } genomeSubstring.seq = &data->genome->seq[substringPos]; genomeSubstring.length = substringLength; FastqAlignment alignment; int readIndex = (*keyIt).readIndex; int alignScore; alignScore = KBandAlign((*data->reads)[readIndex], genomeSubstring, SMRTDistanceMatrix, 6, // ins 6, // del 0.30*(*data->reads)[readIndex].length, insScoreMat, insPathMat, alignment, distanceMatrixScoreFn, QueryFit); if (alignScore < (*data->readOptScore)[readIndex]) { (*data->readOptScore)[readIndex] = alignScore; (*data->optAlignment)[readIndex] = alignment; (*data->optAlignment)[readIndex].tAlignedSeqPos = substringPos; (*data->optGenomeAlignPos)[readIndex] = substringPos; (*data->optGenomeAlignLength)[readIndex] = substringLength; } (*data->prevAlignedGenomePos)[readIndex] = substringPos; /* cout << genomePos << " read: " << readIndex << " readpos: " << (*keyIt).readPos << " score " << alignScore << endl; */ } if (genomePos % 1000 == 0) { cerr << genomePos << endl; } } }