int main(int argc, char* argv[]) { string fileAName, fileBName; if (argc < 3) { cout << "usage: extendAlign file1 fil2 [pos1 pos2] " << endl; exit(0); } fileAName = argv[1]; fileBName = argv[2]; int argi = 3; int aPos = 0; int bPos = 0; if (argc == 5) { aPos = atoi(argv[3]); bPos = atoi(argv[4]); } ReaderAgglomerate reader; reader.Initialize(fileAName); FASTASequence aSeq, bSeq; reader.GetNext(aSeq); reader.Initialize(fileBName); reader.GetNext(bSeq); DistanceMatrixScoreFunction<FASTASequence, FASTASequence> scoreFn; scoreFn.ins = 3; scoreFn.del = 3; scoreFn.InitializeScoreMatrix(SMRTDistanceMatrix); vector<int> scoreMat; vector<Arrow>pathMat; AlignmentCandidate<FASTASequence, FASTASequence> extendedAlignment; /* ExtendAlignmentForward(aSeq, aPos, bSeq, bPos, 5, //k scoreMat, pathMat, extendedAlignment, scoreFn, 1, // don't bother attempting // to extend the alignment // if one of the sequences // is less than 1 base long 2); extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq); extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq); // extendedAlignment.qAlignedSeqPos = aPos; // extendedAlignment.tAlignedSeqPos = bPos; StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout); extendedAlignment.Clear(); */ if (aPos == 0) { aPos = aSeq.length; } if (bPos == 0) { bPos = bSeq.length; } ExtendAlignmentReverse(aSeq, aPos, bSeq, bPos, 5, //k scoreMat, pathMat, extendedAlignment, scoreFn, 1, // don't bother attempting // to extend the alignment // if one of the sequences // is less than 1 base long 2); extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq); extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq); // extendedAlignment.qAlignedSeqPos = aPos; // extendedAlignment.tAlignedSeqPos = bPos; StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout); return 0; }
int main(int argc, char* argv[]) { CommandLineParser clp; string cmpFileName; vector<int> holeNumbers; vector<string> patterns, refGroups; bool printAll = false; clp.RegisterStringOption("cmph5filename", &cmpFileName, "input cmp h5", false); clp.RegisterPreviousFlagsAsHidden(); clp.RegisterIntListOption("holeNumbers", &holeNumbers, "hole numbers to print alignments", false); clp.RegisterStringListOption("pattern", &patterns, "patterns to search read names to print alignments", false); clp.RegisterFlagOption("all", &printAll, "Just print all alignments.", false); clp.RegisterStringListOption("refgroups", &refGroups, "Reference groups to print.", false); clp.ParseCommandLine(argc, argv); CmpFile cmpFile; /* * These readers pull information from the same pls file. */ HDFCmpFile<CmpAlignment> hdfcmpFile; if (hdfcmpFile.Initialize(cmpFileName) == 0) { cout << "ERROR, could not open the cmp file." << endl; exit(1); } hdfcmpFile.Read(cmpFile); int alignmentIndex; for (alignmentIndex = 0; alignmentIndex < cmpFile.alnInfo.alignments.size(); alignmentIndex++) { int alnHoleNumber; alnHoleNumber = cmpFile.alnInfo.alignments[alignmentIndex].GetHoleNumber(); int hi; bool printThisAlignment = false; // // Read the alignment string. All alignments // int refGroupId = cmpFile.alnInfo.alignments[alignmentIndex].GetRefGroupId(); int alnGroupId = cmpFile.alnInfo.alignments[alignmentIndex].GetAlnGroupId(); int refGroupIndex = hdfcmpFile.refGroupIdToArrayIndex[refGroupId]; string readGroupName = hdfcmpFile.alnGroupIdToReadGroupName[alnGroupId]; int readGroupIndex = hdfcmpFile.refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName]; string refGroupPath = cmpFile.refGroup.path[refGroupIndex]; for (hi = 0; hi < holeNumbers.size(); hi++) { if (alnHoleNumber == holeNumbers[hi]) { printThisAlignment = true; break; } } int ri; for (ri = 0; ri < refGroups.size(); ri++) { if (refGroups[ri] == refGroupPath) { printThisAlignment = true; break; } } if (printThisAlignment or printAll) { unsigned int alignStartIndex, alignEndIndex; UInt offsetBegin, offsetEnd; string refSequence; string readSequence; vector<unsigned char> byteAlignment; offsetBegin = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetBegin(); offsetEnd = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetEnd(); int alignedSequenceLength = offsetEnd - offsetBegin; if (alignedSequenceLength >= 0) { refSequence.resize(alignedSequenceLength); byteAlignment.resize(alignedSequenceLength); } hdfcmpFile.refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, offsetEnd, &byteAlignment[0]); readSequence.resize(byteAlignment.size()); refSequence.resize(byteAlignment.size()); ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &readSequence[0]); ByteAlignmentToRefString(&byteAlignment[0], byteAlignment.size(), &refSequence[0]); string ungappedRead, ungappedRef; RemoveGaps(readSequence, ungappedRead); RemoveGaps(refSequence, ungappedRef); Alignment alignment; GappedStringsToAlignment(readSequence, refSequence, alignment); DNASequence qAlignedSeq, rAlignedSeq; qAlignedSeq.seq = (Nucleotide*) &ungappedRead[0]; qAlignedSeq.length = ungappedRead.size(); rAlignedSeq.seq = (Nucleotide*) &ungappedRef[0]; rAlignedSeq.length = ungappedRef.size(); int qStart = cmpFile.alnInfo.alignments[alignmentIndex].GetQueryStart(); int tStart = cmpFile.alnInfo.alignments[alignmentIndex].GetRefStart(); stringstream sstrm; sstrm << alnHoleNumber << "/" << qStart << "_" << cmpFile.alnInfo.alignments[alignmentIndex].GetQueryEnd(); alignment.qName = sstrm.str(); StickPrintAlignment(alignment, qAlignedSeq, rAlignedSeq, cout, qStart, tStart); } } }
int main(int argc, char* argv[]) { if (argc < 4) { PrintUsage(); exit(1); } string queryName, targetName; queryName = argv[1]; targetName = argv[2]; TupleMetrics tm; tm.Initialize(atoi(argv[3])); int argi = 4; float indelRate = 0.25; int indel = 3; int match = 0; int printSW = 0; int refineAlignments = 1; int showalign = 0; int fixedTarget = 0; int sdpIndel = indel; int sdpIns = 5; int sdpDel = 5; AlignmentType alignType = Global; while (argi < argc) { if (strcmp(argv[argi], "-indelRate") == 0) { ++argi; indelRate = atof(argv[argi]); } else if (strcmp(argv[argi], "-printsw") == 0) { printSW = 1; } else if (strcmp(argv[argi], "-noRefine") == 0) { refineAlignments = 0; } else if (strcmp(argv[argi], "-indel") == 0) { indel = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-sdpIndel") == 0) { sdpIndel = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-sdpIns") == 0) { sdpIns = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-sdpDel") == 0) { sdpDel = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-showalign") == 0) { showalign = 1; } else if (strcmp(argv[argi], "-local") == 0) { alignType = Local; } else if (strcmp(argv[argi], "-match") == 0) { match = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-fixedtarget") == 0) { fixedTarget = 1; } else { PrintUsage(); cout << "Bad option: " << argv[argi] << endl; exit(1); } ++argi; } FASTASequence query, target; FASTAReader queryReader, targetReader; queryReader.Init(queryName); targetReader.Init(targetName); if (match != 0) { int i; for (i = 0; i < 4; i++ ){ LocalAlignLowMutationMatrix[i][i] = match; } } int seqIndex = 0; Alignment alignment; vector<int> scoreMat; vector<Arrow> pathMat; DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn; distScoreFn.del = indel; distScoreFn.ins = indel; distScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix); if (fixedTarget) { targetReader.GetNext(target); } cout << "qid,tid,qstart,qend,qlen,tstart,tend,tlen,score" << endl; while (queryReader.GetNext(query) and (fixedTarget or targetReader.GetNext(target))) { if (query.length == 0 or target.length == 0) continue; alignment.blocks.clear(); int alignScore; alignScore = SDPAlign(query, target, distScoreFn, tm.tupleSize, sdpIndel, sdpIndel, indelRate, alignment, alignType, refineAlignments, false, 0); if (alignScore > 0){ // in rare cases the SDP returns positive. alignScore = 0; // this makes it more like a true local alignment } if (showalign) { StickPrintAlignment(alignment, query, target, cout); } if (printSW) { MatchedAlignment swAlignment; vector<int> scoreMat; vector<Arrow> pathMat; SWAlign(query, target, scoreMat, pathMat, swAlignment, distScoreFn); StickPrintAlignment(swAlignment, query, target, cout); } cout << query.GetName() << "," << target.GetName() << "," << alignment.qPos << "," << alignment.QEnd() << "," << query.length << "," << alignment.tPos << "," << alignment.TEnd() << "," << target.length << "," << alignScore << endl; ++seqIndex; } return 0; }
int main(int argc, char* argv[]) { if (argc < 3) { cout << "usage: samatcher queryfile targetfile" <<endl; exit(1); } string queryFileName, targetFileName; int minMatchLength = 5; int maxExpand = 0; queryFileName = argv[1]; targetFileName = argv[2]; int argi = 3; AnchorParameters anchorParams; while (argi < argc) { if (strcmp(argv[argi], "-minmatch") == 0) { anchorParams.minMatchLength = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-maxexpand") == 0) { anchorParams.expand = atoi(argv[++argi]); } else { cout << "ERROR! Invalid argument: " << argv[argi]<< endl; exit(1); } ++argi; } FASTQSequence query, target; FASTAReader queryReader, targetReader; queryReader.Init(queryFileName); targetReader.Init(targetFileName); while(1) { if (!queryReader.GetNext(query)) break; if (!targetReader.GetNext(target)) break; query.ToUpper(); target.ToUpper(); // // Build the suffix array on the target. // DNASuffixArray sarray; target.ToThreeBit(); vector<int> alphabet; sarray.InitThreeBitDNAAlphabet(alphabet); sarray.LarssonBuildSuffixArray(target.seq, target.length, alphabet); cout <<"done building suffix array." << endl; target.ToAscii(); // // Find the list of anchors. // query.PrintSeq(cout); cout << "target: " << endl; target.PrintSeq(cout); MatchPosList matchPosList; int numKeysMatched; anchorParams.useLookupTable = false; numKeysMatched = MapReadToGenome(target, sarray, query, sarray.lookupPrefixLength, matchPosList, anchorParams); // // Now, convert the matchPosList to a set of fragments // that can be used in the sdp. // SortMatchPosList(matchPosList); vector<ChainedFragment> fragments; fragments.resize(matchPosList.size()); VectorIndex i; for (i = 0; i < matchPosList.size(); i++) { fragments[i].x = matchPosList[i].t; fragments[i].y = matchPosList[i].q; fragments[i].length = fragments[i].weight = matchPosList[i].w; // cout << fragments[i].x << " " << fragments[i].y << " " << fragments[i].weight << endl; } cout << "stored a total of: " << fragments.size() << " fragments." << endl; int maxFragmentChainLength; vector<DNALength> maxFragmentChain; maxFragmentChainLength = GlobalChain<ChainedFragment, BasicEndpoint<ChainedFragment> >(fragments, maxFragmentChain); MatchedAlignment alignment; std::reverse(maxFragmentChain.begin(), maxFragmentChain.end()); alignment.AllocateBlocks(maxFragmentChain.size()); for (i = 0; i < maxFragmentChain.size(); i++) { alignment.blocks[i].qPos = fragments[maxFragmentChain[i]].y; alignment.blocks[i].tPos = fragments[maxFragmentChain[i]].x; alignment.blocks[i].length = fragments[maxFragmentChain[i]].length; cout << "( " << fragments[maxFragmentChain[i]].x << " " << fragments[maxFragmentChain[i]].y << " " << fragments[maxFragmentChain[i]].length << ") "; } cout << endl; alignment.tStart = alignment.qStart = 0; alignment.tPos = alignment.qPos = 0; StickPrintAlignment(alignment, query, target, cout); } return 0; }