Exemple #1
0
int main(int argc, char* argv[]) {
	string fileAName, fileBName;
	if (argc < 3) {
		cout << "usage: extendAlign file1 fil2 [pos1 pos2] " << endl;
		exit(0);
	}

	fileAName = argv[1];
	fileBName = argv[2];
	int argi = 3;
	int aPos = 0;
	int bPos = 0;
	if (argc == 5) {
		aPos = atoi(argv[3]);
		bPos = atoi(argv[4]);
	}
	
	ReaderAgglomerate reader;
	reader.Initialize(fileAName);
	
	FASTASequence aSeq, bSeq;
	reader.GetNext(aSeq);
	reader.Initialize(fileBName);
	reader.GetNext(bSeq);
	
	DistanceMatrixScoreFunction<FASTASequence, FASTASequence> scoreFn;
	scoreFn.ins = 3;
	scoreFn.del = 3;
	scoreFn.InitializeScoreMatrix(SMRTDistanceMatrix);

	vector<int>  scoreMat;
	vector<Arrow>pathMat;
	
	AlignmentCandidate<FASTASequence, FASTASequence> extendedAlignment;

	/*	ExtendAlignmentForward(aSeq, aPos,
												 bSeq, bPos,
												 5, //k
												 scoreMat, pathMat,
												 extendedAlignment,
												 scoreFn,
												 1, // don't bother attempting
												 // to extend the alignment
												 // if one of the sequences
												 // is less than 1 base long
												 2);

	extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq);
	extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq);

	//	extendedAlignment.qAlignedSeqPos = aPos;
	//	extendedAlignment.tAlignedSeqPos = bPos;

	StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout);
	extendedAlignment.Clear();
	*/
	if (aPos == 0) { aPos = aSeq.length; }
	if (bPos == 0) { bPos = bSeq.length; }

	ExtendAlignmentReverse(aSeq, aPos,
												 bSeq, bPos,
												 5, //k
												 scoreMat, pathMat,
												 extendedAlignment,
												 scoreFn,
												 1, // don't bother attempting
												 // to extend the alignment
												 // if one of the sequences
												 // is less than 1 base long
												 2);

	extendedAlignment.qAlignedSeq.ReferenceSubstring(aSeq);
	extendedAlignment.tAlignedSeq.ReferenceSubstring(bSeq);

	//	extendedAlignment.qAlignedSeqPos = aPos;
	//	extendedAlignment.tAlignedSeqPos = bPos;

	StickPrintAlignment(extendedAlignment, aSeq, bSeq, cout);

	return 0;
}
Exemple #2
0
int main(int argc, char* argv[]) {


	CommandLineParser clp;
	string cmpFileName;
	vector<int> holeNumbers;
	vector<string> patterns, refGroups;
  bool printAll = false;
	clp.RegisterStringOption("cmph5filename", &cmpFileName, "input cmp h5", false);
	clp.RegisterPreviousFlagsAsHidden();
	clp.RegisterIntListOption("holeNumbers", &holeNumbers, "hole numbers to print alignments", false);
	clp.RegisterStringListOption("pattern", &patterns, "patterns to search read names to print alignments", false);	
  clp.RegisterFlagOption("all", &printAll, "Just print all alignments.", false);
  clp.RegisterStringListOption("refgroups", &refGroups, "Reference groups to print.", false);
	clp.ParseCommandLine(argc, argv);

	
	CmpFile cmpFile;
	
	/*
	 * These readers pull information from the same pls file.
	 */
	HDFCmpFile<CmpAlignment> hdfcmpFile;

	if (hdfcmpFile.Initialize(cmpFileName) == 0) {
		cout << "ERROR, could not open the cmp file." << endl;
		exit(1);
	}
	
	hdfcmpFile.Read(cmpFile);
	
	int alignmentIndex;
	for (alignmentIndex = 0; alignmentIndex < cmpFile.alnInfo.alignments.size(); alignmentIndex++) {
		int alnHoleNumber;
		alnHoleNumber = cmpFile.alnInfo.alignments[alignmentIndex].GetHoleNumber();
		int hi;
    bool printThisAlignment = false;

    //
    // Read the alignment string.  All alignments 
    //
    int refGroupId = cmpFile.alnInfo.alignments[alignmentIndex].GetRefGroupId();
    int alnGroupId  = cmpFile.alnInfo.alignments[alignmentIndex].GetAlnGroupId();

    int refGroupIndex = hdfcmpFile.refGroupIdToArrayIndex[refGroupId];
    string readGroupName = hdfcmpFile.alnGroupIdToReadGroupName[alnGroupId];
    int readGroupIndex = hdfcmpFile.refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName];

    string refGroupPath = cmpFile.refGroup.path[refGroupIndex];

		for (hi = 0; hi < holeNumbers.size(); hi++) {
			if (alnHoleNumber == holeNumbers[hi]) {
        printThisAlignment = true;
        break;
      }
    }
    int ri;
    for (ri = 0; ri < refGroups.size(); ri++) {
      if (refGroups[ri] == refGroupPath) {
        printThisAlignment = true;
        break;
      }
    }


    if (printThisAlignment or printAll) {
      unsigned int alignStartIndex, alignEndIndex;
      UInt offsetBegin, offsetEnd;
		
      string   refSequence;
      string   readSequence;
      vector<unsigned char> byteAlignment;

      offsetBegin = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetBegin();
      offsetEnd   = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetEnd();
      int alignedSequenceLength = offsetEnd - offsetBegin;
      if (alignedSequenceLength >= 0) {
        refSequence.resize(alignedSequenceLength);
        byteAlignment.resize(alignedSequenceLength);
      }
	
      
      hdfcmpFile.refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, 
                                                                                               offsetEnd, 
                                                                                               &byteAlignment[0]);

      readSequence.resize(byteAlignment.size());
      refSequence.resize(byteAlignment.size());

      ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &readSequence[0]);
      ByteAlignmentToRefString(&byteAlignment[0], byteAlignment.size(), &refSequence[0]);				
      string ungappedRead, ungappedRef;
      RemoveGaps(readSequence, ungappedRead);
      RemoveGaps(refSequence, ungappedRef);
      Alignment alignment;
      GappedStringsToAlignment(readSequence, refSequence, alignment);
      DNASequence qAlignedSeq, rAlignedSeq;
      qAlignedSeq.seq = (Nucleotide*) &ungappedRead[0];
      qAlignedSeq.length = ungappedRead.size();
      rAlignedSeq.seq = (Nucleotide*) &ungappedRef[0];
      rAlignedSeq.length = ungappedRef.size();
				
      int qStart = cmpFile.alnInfo.alignments[alignmentIndex].GetQueryStart();
      int tStart = cmpFile.alnInfo.alignments[alignmentIndex].GetRefStart();
      stringstream sstrm;
      sstrm << alnHoleNumber << "/" << qStart << "_" << cmpFile.alnInfo.alignments[alignmentIndex].GetQueryEnd();
      alignment.qName = sstrm.str();
      StickPrintAlignment(alignment, qAlignedSeq, rAlignedSeq, cout, qStart, tStart);
				
    }
  }
}
Exemple #3
0
int main(int argc, char* argv[]) {
    if (argc < 4) {
        PrintUsage();
        exit(1);
    }

    string queryName, targetName;
    queryName = argv[1];
    targetName = argv[2];
    TupleMetrics tm;
    tm.Initialize(atoi(argv[3]));
    int argi = 4;
    float indelRate = 0.25;
    int indel = 3;
    int match = 0;
    int printSW = 0;
    int refineAlignments = 1;
    int showalign = 0;
    int fixedTarget = 0;
    int sdpIndel = indel;
    int sdpIns = 5;
    int sdpDel = 5;
    AlignmentType alignType = Global;
    while (argi < argc) {
        if (strcmp(argv[argi], "-indelRate") == 0) {
            ++argi;
            indelRate = atof(argv[argi]);
        }
        else if (strcmp(argv[argi], "-printsw") == 0) {
            printSW = 1;
        }
        else if (strcmp(argv[argi], "-noRefine") == 0) {
            refineAlignments = 0;
        }
        else if (strcmp(argv[argi], "-indel") == 0) {
            indel = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-sdpIndel") == 0) {
            sdpIndel = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-sdpIns") == 0) {
            sdpIns  = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-sdpDel") == 0) {
            sdpDel  = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-showalign") == 0) {
            showalign = 1;
        }
        else if (strcmp(argv[argi], "-local") == 0) {
            alignType = Local;
        }
        else if (strcmp(argv[argi], "-match") == 0) {
            match = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-fixedtarget") == 0) {
            fixedTarget = 1;
        }
        else {
            PrintUsage();
            cout << "Bad option: " << argv[argi] << endl;
            exit(1);
        }
        ++argi;
    }

    FASTASequence query, target;
    FASTAReader queryReader, targetReader;
    queryReader.Init(queryName);
    
    targetReader.Init(targetName);

    if (match != 0) {
        int i;
        for (i = 0; i < 4; i++ ){
            LocalAlignLowMutationMatrix[i][i] = match;
        }
    }

    int seqIndex = 0;
    Alignment alignment;
    vector<int> scoreMat;
    vector<Arrow> pathMat;
    DistanceMatrixScoreFunction<DNASequence, DNASequence> distScoreFn;
    distScoreFn.del = indel;
    distScoreFn.ins = indel;
    distScoreFn.InitializeScoreMatrix(SMRTDistanceMatrix);

    if (fixedTarget) {
        targetReader.GetNext(target);
    }

    cout << "qid,tid,qstart,qend,qlen,tstart,tend,tlen,score" << endl;
    while (queryReader.GetNext(query) and 
           (fixedTarget or targetReader.GetNext(target))) {
        
        if (query.length == 0 or target.length == 0)
            continue; 
        alignment.blocks.clear();

        int alignScore;
        alignScore = SDPAlign(query, target,
                              distScoreFn, tm.tupleSize, 
                              sdpIndel, sdpIndel, indelRate, 
                              alignment, 
                              alignType,
                              refineAlignments,
                              false,
                              0);

        if (alignScore > 0){ // in rare cases the SDP returns positive. 
            alignScore = 0;  // this makes it more like a true local alignment
        }                   

        if (showalign) {
            StickPrintAlignment(alignment, query, target, cout);
        }

        if (printSW) {
            MatchedAlignment swAlignment;
            vector<int> scoreMat;
            vector<Arrow> pathMat;
            SWAlign(query, target, scoreMat, pathMat, swAlignment, distScoreFn);        
            StickPrintAlignment(swAlignment, query, target, cout);
        }

        cout << query.GetName()  << "," << target.GetName() << "," 
             << alignment.qPos << "," << alignment.QEnd()   << "," 
             << query.length  << "," << alignment.tPos << "," 
             << alignment.TEnd()   << "," << target.length << "," 
             << alignScore << endl;

        ++seqIndex;
    }

    return 0;
}
int main(int argc, char* argv[]) {
	if (argc < 3) {
		cout << "usage: samatcher queryfile targetfile" <<endl;
		exit(1);
	}

	string queryFileName, targetFileName;
	int minMatchLength = 5;
	int maxExpand      = 0;
	queryFileName  = argv[1];
	targetFileName = argv[2];
	int argi = 3;
	AnchorParameters anchorParams;
	
	while (argi < argc) {
		if (strcmp(argv[argi], "-minmatch") == 0) {
			anchorParams.minMatchLength = atoi(argv[++argi]);
		}
		else if (strcmp(argv[argi], "-maxexpand") == 0) {
			anchorParams.expand = atoi(argv[++argi]);
		}
		else {
			cout << "ERROR! Invalid argument: " << argv[argi]<< endl;
			exit(1);
		}
		++argi;
	}

	FASTQSequence query, target;
	FASTAReader queryReader, targetReader;
	queryReader.Init(queryFileName);
	targetReader.Init(targetFileName);


	while(1) {
		if (!queryReader.GetNext(query)) break;
		if (!targetReader.GetNext(target)) break;

		query.ToUpper();
		target.ToUpper();

		//
		// Build the suffix array on the target.
		//
		DNASuffixArray sarray;
		target.ToThreeBit();		
		vector<int> alphabet;
		sarray.InitThreeBitDNAAlphabet(alphabet);
		sarray.LarssonBuildSuffixArray(target.seq, target.length, alphabet);
		cout <<"done building suffix array." << endl;
		target.ToAscii();

		//
		// Find the list of anchors.
		//
	
		query.PrintSeq(cout);
		cout << "target: " << endl;
		target.PrintSeq(cout);
		MatchPosList matchPosList;
		int numKeysMatched;
		anchorParams.useLookupTable = false;
		numKeysMatched   = 
			MapReadToGenome(target, sarray, query, sarray.lookupPrefixLength,
											matchPosList, anchorParams);
	
		//
		// Now, convert the matchPosList to a set of fragments
		// that can be used in the sdp.
		//
		SortMatchPosList(matchPosList);
		vector<ChainedFragment> fragments;
		fragments.resize(matchPosList.size());
		VectorIndex i;
		for (i = 0; i < matchPosList.size(); i++) {
			fragments[i].x = matchPosList[i].t;
			fragments[i].y = matchPosList[i].q;
			fragments[i].length = fragments[i].weight = matchPosList[i].w;
			//		cout << fragments[i].x << " " << fragments[i].y << " " << fragments[i].weight << endl;
		}
		cout << "stored a total of: " << fragments.size() << " fragments." << endl;

		int maxFragmentChainLength;
		vector<DNALength> maxFragmentChain;
	
		maxFragmentChainLength = GlobalChain<ChainedFragment, BasicEndpoint<ChainedFragment> >(fragments, maxFragmentChain);
		MatchedAlignment alignment;		
		std::reverse(maxFragmentChain.begin(), maxFragmentChain.end());
		alignment.AllocateBlocks(maxFragmentChain.size());
		for (i = 0; i < maxFragmentChain.size(); i++) {
			alignment.blocks[i].qPos = fragments[maxFragmentChain[i]].y;
			alignment.blocks[i].tPos = fragments[maxFragmentChain[i]].x;
			alignment.blocks[i].length = fragments[maxFragmentChain[i]].length;
			cout << "( " << fragments[maxFragmentChain[i]].x << " "
					 << fragments[maxFragmentChain[i]].y << " "
					 << fragments[maxFragmentChain[i]].length << ") ";
		}
		cout << endl;
		alignment.tStart = alignment.qStart = 0;
		alignment.tPos = alignment.qPos = 0;
		StickPrintAlignment(alignment, query, target, cout);
	}
	return 0;
}