C++ (Cpp) MultipleAlignment Examples

Programming Language: C++ (Cpp)

Examples at hotexamples.com: 6

C++ (Cpp) MultipleAlignment - 6 examples found. These are the top rated real world C++ (Cpp) examples of MultipleAlignment extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

isBlockAligned(2)

print(2)

Alignment2Profile(1)

BuildAlignment(1)

ImportBasics(1)

PreAlignedInput(1)

addBaseSequence(1)

addOverlap(1)

getNumColumns(1)

getNumRows(1)

getSymbol(1)

getSymbolCountVector(1)

Example #1

Show file

File: DindelUtil.cpp Project: BioinformaticsArchive/sga

void DindelUtil::doMultipleReadHaplotypeAlignment(const std::vector<DindelRead> & dReads,
                                                  const StringVector & haplotypes)
{

    // globally align haplotypes to the first haplotype (arbitrary)
    assert(haplotypes.size()>0);
    for (size_t h = 0; h < haplotypes.size(); ++h)
    {
        std::cout << "ALIGNING EVERYTHING AGAINST HAPLOTYPE " << h << "\n";
        MultipleAlignment ma;
        const std::string rootSequence = haplotypes[h];
        ma.addBaseSequence("root", rootSequence, "");

        std::string hid;
        for(size_t j = 0; j < haplotypes.size(); j++)
        {
            std::stringstream ss;
            if (j!=h)
                ss << "haplotype-" << j;
            else
                ss << "HAPLOTYPE-" << j;
            SequenceOverlap o = Overlapper::computeOverlap(rootSequence, haplotypes[j]);
            ma.addOverlap(ss.str(), haplotypes[j], "", o);
        }
    

        for(size_t r = 0; r < dReads.size(); ++r)
        {
            std::stringstream ss;
            if (r<dReads.size()/2) 
                ss << "read-" << r << "("  << dReads[r].getID() << ")"; 
            else 
                ss << "MATE read-" << r;
            
            SequenceOverlap o = Overlapper::computeOverlap(rootSequence, dReads[r].getSequence());
            ma.addOverlap(ss.str(), dReads[r].getSequence(), "", o);
        }
        ma.print(100000);
    }
}

Example #2

Show file

File: OverlapHaplotypeBuilder.cpp Project: nilesh-iiita/MBB-Bio-Roll

bool OverlapHaplotypeBuilder::buildInitialGraph(const StringVector& reads)
{
    PROFILE_FUNC("OverlapHaplotypeBuilder::buildInitialGraph")
    // Compute initial ordering of reads based on the position of the
    // starting kmer sequence. If the starting kmer was corrected out
    // of a read, it is discarded.
    StringVector ordered_reads;
    orderReadsInitial(m_initial_kmer_string, reads, &ordered_reads);

    if(ordered_reads.size() < m_parameters.minDiscoveryCount)
        return false;

#ifdef SHOW_MULTIPLE_ALIGNMENT
    //DEBUG print MA
    MultipleAlignment ma = buildMultipleAlignment(ordered_reads);
    ma.print(200);
#endif
    // Insert initial reads into graph
    for(size_t i = 0; i < ordered_reads.size(); ++i)
        insertVertexIntoGraph("seed-", ordered_reads[i]);
    return true;
}

Example #3

Show file

File: cuCD.cpp Project: DmitrySigaev/ncbi

int IntersectByMaster(CCdCore* ccd, double rowFraction) {

    int result = -1;
    unsigned int masterLen = (ccd) ? ccd->GetSequenceStringByRow(0).length() : 0;
    if (masterLen == 0) return result;

    int slaveStart;
    int nAlignedIBM = 0;
    unsigned int i, j, nBlocks;
    unsigned int nRows = ccd->GetNumRows();

    //  If there is already a consistent block model, do nothing.
    MultipleAlignment* ma = new MultipleAlignment(ccd);
    if (ma && ma->isBlockAligned()) {
        delete ma;
        return 0;
    }
    delete ma;


    BlockIntersector blockIntersector(masterLen);
    BlockModel* intersectedBlockModel;
    //BlockModel* simpleIntersectedBlockModel;
    BlockModelPair* bmp;
    vector<BlockModelPair*> blockModelPairs;
    set<int> forcedCTerminiInIntersection;

    list< CRef< CSeq_align > >& cdSeqAligns = ccd->GetSeqAligns();
    list< CRef< CSeq_align > >::iterator cdSeqAlignIt = cdSeqAligns.begin(), cdSeqAlignEnd = cdSeqAligns.end();

    for (i = 0; cdSeqAlignIt != cdSeqAlignEnd; ++cdSeqAlignIt, ++i) {
        bmp = new BlockModelPair(*cdSeqAlignIt);

        //  We assume # of blocks and all block lengths are same on master and slave.
        if (bmp && bmp->isValid()) {

            blockModelPairs.push_back(bmp);
            blockIntersector.addOneAlignment(bmp->getMaster());

            //  Find places the intersection can't merge blocks (i.e., where there are
            //  gaps in the slave across a block boundary, but not in the master).
            BlockModel& slave = bmp->getSlave();
            nBlocks = slave.getBlocks().size();
            for (j = 0; j < nBlocks - 1; ++j) {  //  '-1' as I don't care about end of the C-terminal block
                if (slave.getGapToCTerminal(j) > 0 && bmp->getMaster().getGapToCTerminal(j) == 0) {
                    forcedCTerminiInIntersection.insert(bmp->getMaster().getBlock(j).getEnd());
                }
            }
        }
    }

    //  There was a problem creating one of the BlockModelPair objects from a seq_align,
    //  or one or more seq_align was invalid.
    if (blockModelPairs.size() != cdSeqAligns.size()) {
        return result;
    }

    //simpleIntersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection);
    intersectedBlockModel = blockIntersector.getIntersectedAlignment(forcedCTerminiInIntersection, rowFraction);
    nAlignedIBM = (intersectedBlockModel) ? intersectedBlockModel->getTotalBlockLength() : 0;
    if (nAlignedIBM == 0) {
        return result;
    }

/*
    string testStr, testStr2;
    string sint = intersectedBlockModel->toString();
    string sintsimple = simpleIntersectedBlockModel->toString();
    delete simpleIntersectedBlockModel;
    cout << "rowFraction = 1:\n" << sintsimple << endl;
    cout << "rowFraction = " << rowFraction << ":\n" << sint << endl;
*/

    //  As we have case where every block model isn't identical,
    //  change each seq-align to reflect the common set of aligned columns.
    nBlocks = intersectedBlockModel->getBlocks().size();
    for (i = 0, cdSeqAlignIt = cdSeqAligns.begin(); i < nRows - 1 ; ++i, ++cdSeqAlignIt) {

        bmp = blockModelPairs[i];  //BlockModelPair seqAlignPair(*cdSeqAlignIt);
        BlockModel* intersectedSeqAlignSlave = new BlockModel(bmp->getSlave().getSeqId(), false);

        bmp->reverse();
        for (j = 0; j < nBlocks; ++j) {
            const Block& jthMasterBlock = intersectedBlockModel->getBlock(j);
            slaveStart = bmp->mapToMaster(jthMasterBlock.getStart());

            //  since we're dealing w/ an intersection, slaveStart should always be valid
            assert(slaveStart != -1);

            Block b(slaveStart, jthMasterBlock.getLen(), jthMasterBlock.getId());
            intersectedSeqAlignSlave->addBlock(b);
        }
        *cdSeqAlignIt = intersectedSeqAlignSlave->toSeqAlign(*intersectedBlockModel);
        //testStr = intersectedSeqAlignSlave->toString();
        //testStr2 = bmp->getMaster().toString();  // original *slave* alignment

        delete bmp;
    }
    blockModelPairs.clear();
    result = nBlocks;

    delete intersectedBlockModel;

    return result;
}

Example #4

Show file

File: cuCD.cpp Project: DmitrySigaev/ncbi

void GetAlignmentColumnsForCD(CCdCore* cd, map<unsigned int, string>& columns, unsigned int referenceRow)
{
    bool isOK = true, useRefRow = true;
    int j;
    unsigned int i, col, row, pos, mapIndex, nRows, nCols, nBlocks;
    char** alignedResidues = NULL;
    string rowString, colString;
    //  Map column number to position on the selected reference row.
    map<unsigned int, unsigned int> colToPos;
    map<unsigned int, string> rowStrings;
    vector<int> starts, lengths;
    CRef< CSeq_align > seqAlign;

    //  Empty the columns map first, as this is used as a way to flag problems.
    columns.clear();

    if (!cd) return;

    //  Check if the block structure is consistent.
    try {
        MultipleAlignment* ma = new MultipleAlignment(cd);
        if (!ma) {
            ERR_POST("Creation of MultipleAlignment object failed for CD " << cd->GetAccession() << ".");
            return;
        } else if (! ma->isBlockAligned()) {
            delete ma;
            ERR_POST("CD " << cd->GetAccession() << " must have a consistent block structure for column extraction.");
            return;
        }
        delete ma;
        ma = NULL;
    } catch (...) {
        ERR_POST("Could not extract columns for CD " << cd->GetAccession());
    }

    nCols = cd->GetAlignmentLength();
    nRows = cd->GetNumRows();

    //  Get a reference seq-align for mapping between alignment rows.
    //  If the columns map index will simply be the column count, use the master, row 0.
    if (referenceRow >= nRows) {
        useRefRow = false;
        referenceRow = 0;
    } 
    if (! cd->GetSeqAlign(referenceRow, seqAlign)) {
        isOK = false;
    }

    //  Initialize the column # -> reference row position mapping.
    //  If useRefRow is true, use the indicated row's coordinates as the position.
    //  Otherwise, use the column number as the position.
    if (isOK && GetBlockStarts(seqAlign, starts, (referenceRow == 0)) > 0 && GetBlockLengths(seqAlign, lengths) > 0) {
        nBlocks = starts.size();
        if (nBlocks == lengths.size()) {
            for (i = 0, col = 0; i < nBlocks; ++i) {
                pos = (useRefRow) ? starts[i] : col;
                for (j = 0; j < lengths[i]; ++j, ++col, ++pos) {
                    //  Not explicitly checking if 'pos' is aligned since above 
                    //  we confirmed the CD has a valid block model.
                    colToPos[col] = pos;
                }
            }
        } else {
            isOK = false;
        }
    } else {
        isOK = false;
    }

    SetAlignedResiduesForCD(cd, alignedResidues, true);

    //  Construct the columns as string objects.
    if (isOK && alignedResidues) {
        for (col = 0; col < nCols; ++col) {
            colString.erase();
            for (row = 0; row < nRows; ++row) {
                colString += alignedResidues[row][col];
            }
            mapIndex = colToPos[col];
            columns[mapIndex] = colString;
        }
    }

    //  Clean up array of characters.
    if (alignedResidues) {
        for (row = 0; row < nRows; ++row) {
            delete [] alignedResidues[row];
        }
        delete [] alignedResidues;
    }

}

Example #5

Show file

File: main.cpp Project: razZ0r/STAMP

int main (int argc, char *argv[])
{
	int i;
	PlatformSupport* Plat = new PlatformSupport();
    ColumnComp* CC;
	Alignment* ALIGN;
	Tree* T;
	MultipleAlignment* MA;
	ProteinDomains* PROTS =NULL;
	MultiAlignRec* pssmAlignment;
	char outFileName[STR_LEN];
	strcpy(outFileName, "out");
    bool colChosen=false, alignChosen=false, treeChosen=false, maChosen=false, usingDomains=false, inputProvided=false, scoresProvided=false;
	bool neuralTree=false; bool testing=false;bool testingAcc=false; bool testingTree=false; bool famNames=false; bool treeClusts=false; bool printTreeClusts=false;
	bool ma_off=false;
	bool tree_loocv=false;//true;
	bool silent=false, htmlOutput=false; bool simMatching=false;
	bool weighting_on=false;
	int matchTopX = TOP_MATCH;

	char inputTFs[STR_LEN];
	char matchTFs[STR_LEN];
	char scoreDist[STR_LEN];
	char inputProteins[STR_LEN];

	//Misc option settings
	bool genRandMotifs=false;
	bool genRandScores=false;
	char randMatOut[STR_LEN];
	char scoresOut[STR_LEN];
	//Default alignment settings
	double gapOpen = DFLT_GAP_OPEN;
	double gapExtend = DFLT_GAP_EXTEND;
	bool overlapAlign = DFLT_OVLP_ALIGN;
	bool extendOverlap=false;
	bool FBP_on = false;
	bool preAlign=false;
    bool pairwiseOnly=false;
    bool forwardAlignOnly=false;
	bool ungapped=false;

	for(i=1; i<argc; i++){
		if(strcmp(argv[i], "-silent")==0)
			silent=true;
		if(strcmp(argv[i], "-html")==0)
			htmlOutput=true;
	}

	//Welcome message
	if(!silent && !htmlOutput){printf("\n\tSTAMP\n\tSimilarity, Tree-building, & Alignment of Motifs and Profiles\n\n\tShaun Mahony\n\tDepartment of Computational Biology\n\tUniversity of Pittsburgh\n\tVersion 1.0 (Winter 2006)\n\n");}

	if(argc ==1) //First and Foremost, the help option
	{	DisplayHelp();
	}else{

	for(i=1; i<argc; i++)
	{
		if(strcmp(argv[i], "-h")==0 || strcmp(argv[i], "?")==0) //First and Foremost, the help option
		{	DisplayHelp();
		}
		if(strcmp(argv[i], "-out")==0) //Output file (for trees & similarity matching)
		{	if(argv[i+1]!=NULL)
			{ strcpy(outFileName, argv[i+1]);}
		}
		if(strcmp(argv[i], "-genrand")==0) //Generate random motifs
		{	if(argv[i+1]!=NULL)
			{ strcpy(randMatOut, argv[i+1]);}
			genRandMotifs=true;
		}
		if(strcmp(argv[i], "-genscores")==0) //Generate simulation scores
		{	if(argv[i+1]!=NULL)
			{ strcpy(scoresOut, argv[i+1]);}
			genRandScores=true;
		}
		if((strcmp(argv[i], "-cc")) ==0)  //Choose a column comparison measure
		{
			if((strcmp(argv[i+1], "PCC"))==0 || (strcmp(argv[i+1], "pcc"))==0){
				CC = new PearsonCorrelation(); //Pearson's correllation coefficient
			}else if((strcmp(argv[i+1], "ALLR"))==0 || (strcmp(argv[i+1], "allr"))==0){
				CC = new ALLR(); //ALLR
			}else if((strcmp(argv[i+1], "ALLR_LL"))==0 || (strcmp(argv[i+1], "allr_ll"))==0){
				CC = new ALLR_LL(); //ALLR with lower limit
			}else if((strcmp(argv[i+1], "CS"))==0 || (strcmp(argv[i+1], "cs"))==0){
				CC = new ChiSq(); //Pearson's Chi Square
			}else if((strcmp(argv[i+1], "KL"))==0 || (strcmp(argv[i+1], "kl"))==0){
				CC = new KullbackLieber(); //Kullback-Lieber
			}else if((strcmp(argv[i+1], "SSD"))==0 || (strcmp(argv[i+1], "ssd"))==0){
				CC = new SumSqDiff(); //sum of squared difference
			}else{
				CC = new PearsonCorrelation(); //Default = PCC
			}
			colChosen=true;
		}
		//check for alignment settings
		if((strcmp(argv[i], "-go")) ==0){ //Gap Open
			if(argv[i+1]!=NULL)
			{	gapOpen=strtod(argv[i+1], NULL);}
		}
		if((strcmp(argv[i], "-ge")) ==0){ //Gap Extend
			if(argv[i+1]!=NULL)
			{	gapExtend=strtod(argv[i+1], NULL);}
		}
		if((strcmp(argv[i], "-overlapalign")) ==0){ //Only complete overlapping alignments
			overlapAlign = true; if(!silent && !htmlOutput){printf("Overlapping alignments only\n");}
		}if((strcmp(argv[i], "-nooverlapalign")) ==0){ //All overlapping alignments
			overlapAlign = false;
		}
		if((strcmp(argv[i], "-extendoverlap")) ==0){
			extendOverlap=true; if(!silent && !htmlOutput){printf("Extending the overlapping alignments\n");}
		}
        if((strcmp(argv[i], "-forwardonly")) ==0){ //Consider forward alignments only
            forwardAlignOnly = true;
            if(!silent && !htmlOutput){printf("Considering forward direction alignments only\n");}
        }
		if((strcmp(argv[i], "-printpairwise")) ==0){
			pairwiseOnly=true; if(!silent && !htmlOutput){printf("Printing pairwise scores only\n");}
		}
		if((strcmp(argv[i], "-FBP")) ==0){
			FBP_on=true; if(!silent && !htmlOutput){printf("Using FBP profiles\n");}
		}
		if((strcmp(argv[i], "-useweighting")) ==0){
			weighting_on=true; if(!silent && !htmlOutput){printf("Using weighting in FBP construction\n");}
		}
		if((strcmp(argv[i], "-prealigned")) ==0){
			preAlign=true; if(!silent && !htmlOutput){printf("Profiles are pre-aligned\n");}
		}

		//Input TF dataset name
		if((strcmp(argv[i], "-tf")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(inputTFs, argv[i+1]);}
			inputProvided=true;
		}
		//Score distribution file   Make an auto function for this!!!!!!!
		if((strcmp(argv[i], "-sd")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(scoreDist, argv[i+1]);}
			scoresProvided=true;
		}
		//Match input TFs against this dataset
		if((strcmp(argv[i], "-match")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(matchTFs, argv[i+1]);}
			if(argv[i+2]!=NULL && strcmp(argv[i+2], "fams")==0){
				famNames=true;
			}
			simMatching=true;
		}
		if((strcmp(argv[i], "-match_top")) ==0){ //Report the top X matches
			if(argv[i+1]!=NULL)
			{	matchTopX=strtol(argv[i+1], NULL, 10);}
		}
		//Matching input protein (Pfam) alignment dataset name
		if((strcmp(argv[i], "-prot")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(inputProteins, argv[i+1]);}
			usingDomains = true;
		}
		//Run some tests
		if((strcmp(argv[i], "-test")) ==0)
		{	testing=true;
		}
		//Run some different tests
		if((strcmp(argv[i], "-testacc")) ==0)
		{	testingAcc=true;
			famNames=true;
		}
		//Run some tests with trees
		if((strcmp(argv[i], "-testtree")) ==0)
		{	testingTree=true;
			famNames=true;
		}//Run Calinski & Harabasz with trees
		if((strcmp(argv[i], "-ch")) ==0)
		{	testingTree=true; treeClusts=true;
		}//Run Calinski & Harabasz with trees and print the resulting clusters
		if((strcmp(argv[i], "-chp")) ==0)
		{	testingTree=true;
			printTreeClusts=true; treeClusts=true;
		}
	}
	//Defaults
	if(!colChosen)
	{	CC = new PearsonCorrelation();}

	//Second Pass
	for(i=1; i<argc; i++)
	{
		if((strcmp(argv[i], "-align")) ==0)  //Choose an alignment method
		{
            if((strcmp(argv[i+1], "NW"))==0 || (strcmp(argv[i+1], "nw"))==0){
                ALIGN = new NeedlemanWunsch(CC, gapOpen, gapExtend, overlapAlign, extendOverlap, forwardAlignOnly);
            }
            if((strcmp(argv[i+1], "SWU"))==0 || (strcmp(argv[i+1], "swu"))==0){
                ALIGN = new SmithWatermanUngappedExtended(CC,forwardAlignOnly); ungapped=true;
            }
            if((strcmp(argv[i+1], "SWA"))==0 || (strcmp(argv[i+1], "swa"))==0){
                ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly);
            }
            if((strcmp(argv[i+1], "SW"))==0 || (strcmp(argv[i+1], "sw"))==0){
                ALIGN = new SmithWaterman(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly);
            }
            alignChosen = true;
		}
		//Choose a multiple alignment method
		if((strcmp(argv[i], "-ma")) ==0)
		{
			if((strcmp(argv[i+1], "PPA"))==0 || (strcmp(argv[i+1], "ppa"))==0){
				MA = new ProgressiveProfileAlignment(outFileName, htmlOutput);
				maChosen=true;
			}
			if((strcmp(argv[i+1], "IR"))==0 || (strcmp(argv[i+1], "ir"))==0){
				MA = new IterativeRefinementAlignment(outFileName, htmlOutput);
				maChosen=true;
			}
			if((strcmp(argv[i+1], "NONE"))==0 || (strcmp(argv[i+1], "none"))==0){
				maChosen=true; ma_off=true;
			}
		}
	}
	if(!alignChosen)
	{	ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap);
	}
	if(!maChosen)
		MA = new ProgressiveProfileAlignment(outFileName, htmlOutput);
	//Third pass
	//Choose a tree-construction method
	for(i=1; i<argc; i++)
	{	if((strcmp(argv[i], "-tree")) ==0)
		{
			if((strcmp(argv[i+1], "UPGMA"))==0 || (strcmp(argv[i+1], "upgma"))==0){
				T = new UPGMA(ALIGN);
			}
			if((strcmp(argv[i+1], "SOTA"))==0 || (strcmp(argv[i+1], "sota"))==0){
				T = new SOTA(ALIGN, MA); neuralTree=true;
			}
			if((strcmp(argv[i+1], "NJ"))==0 || (strcmp(argv[i+1], "nj"))==0){
				T = new Neighbourjoin(ALIGN); printf("Using Neighbour-joining... ensure that the distance metric is additive\n");
			}
			if((strcmp(argv[i+1], "TDHC"))==0 || (strcmp(argv[i+1], "tdhc"))==0){
				T = new TopDownHClust(ALIGN, MA); neuralTree=true;
			}
			treeChosen=true;
		}
	}
	if(!treeChosen)
		T = new UPGMA(ALIGN);
	T->BeQuiet(silent);

////////////////////////////////////////////////////////////////////////////////////
//////// Main Program /////////////////////////////////////////////////////////////

	//Initialise the background
	Plat->ReadBackground();
	if(inputProvided){
		//Read in the matrices
		Plat->ReadTransfacFile(inputTFs, famNames,true, weighting_on);
		if(!silent && !htmlOutput){
			printf("MatCount: %d\n", Plat->GetMatCount());
			if(ungapped)
				printf("Ungapped Alignment\n");
			else
				printf("Gap open = %.3lf, gap extend = %.3lf\n", gapOpen, gapExtend);
		}
	}else{
		printf("No input motifs provided!\n\n");
	}
	if(genRandMotifs){
		//Generate some random matrices
		RandPSSMGen* RPG = new RandPSSMGen(Plat->inputMotifs, Plat->GetMatCount(), 10000, randMatOut);
		RPG->RunGenerator();
	}
	if(genRandScores){
		//Find rand dist
		Plat->GetRandDistrib(scoresOut, ALIGN);
	}else if(!scoresProvided){
		printf("No score distribution file provided!\n\n");
	}
	if(testing){
		PlatformTesting* PT = new PlatformTesting(CC);
		//Print the distribution of column depth
	//	PT->ColumnDepthDist(Plat->inputMotifs, Plat->GetMatCount());
		//Print the similarities of all columns against all others
	//	PT->ColumnScoreDist(Plat->inputMotifs, Plat->GetMatCount(), 0.05);
		double z;
		for(z=0.25; z<0.8; z+=0.05)
			PT->RandColumns(Plat, z);
		for(z=0.8; z<=1.0; z+=0.01)
			PT->RandColumns(Plat, z);
		delete(PT);
	}

	if(scoresProvided || preAlign){

		Plat->ReadScoreDists(scoreDist);
		if(!silent && !htmlOutput){printf("Scores read\n");}
		if(Plat->GetMatCount()>1){
			if(preAlign){
				//No alignments or trees built here
				pssmAlignment = MA->PreAlignedInput(Plat);
			}else{
				//Multiple alignment procedure
				Plat->PreAlign(ALIGN);
				if(pairwiseOnly){
					if(!silent && !htmlOutput){printf("\nPairwise alignment scores:\n");}
					Plat->PrintPairwise();
				}if(!ma_off){
					MA->ImportBasics(Plat, ALIGN);
					if(!silent && !htmlOutput){printf("Alignments Finished\n");}
					if(!testingAcc){
						if(tree_loocv && testingTree){
							T->LOOCVBuildTree(Plat, testingTree);
						}else{
							if(testingTree && !silent && !htmlOutput){printf("Calinski & Harabasz:\n\tNumClust\tC&H_Metric\n");}
							T->BuildTree(Plat, testingTree);
							if(!silent && treeClusts){printf("The Calinski & Harabasz statistic suggests %.0lf clusters in the input motifs\n", T->GetNodesMinCH());}
							if(printTreeClusts){
								T->PrintLevel(outFileName, int(T->GetNodesMinCH()));
							}
						}
						T->PrintTree(outFileName);

						if(!silent && !htmlOutput){printf("Tree Built\n");}

						if(!silent){
							if(!silent && !htmlOutput){printf("Multiple Alignment:\n");}
							pssmAlignment = MA->BuildAlignment(Plat, ALIGN, T);
						}
					}
				}
			}

			//Experiment with the Protein Domains
			if(usingDomains){
				PROTS = new ProteinDomains();
				PROTS->ReadDomains(inputProteins, Plat->inputMotifs, Plat->GetMatCount());
				PROTS->MutualInformation(pssmAlignment, MA->Alignment2Profile(pssmAlignment, "AlignmentMotif"), Plat->inputMotifs, Plat->GetMatCount());
				delete PROTS;
			}
		}
		//Similarity match against the database
		if(simMatching){
			Plat->ReadTransfacFile(matchTFs, famNames, false, false);
			Plat->SimilarityMatching(ALIGN, outFileName, famNames, matchTopX);
		}
	}

	if(testingAcc && scoresProvided && inputProvided && Plat->GetMatCount()>1){
		PlatformTesting* PT = new PlatformTesting(CC);
		PT->PairwisePredictionAccuracy(Plat);
	}

	delete(MA);
	delete(T);
	delete(CC);
	delete(ALIGN);
	}
delete(Plat);
return(0);
}

Example #6

Show file

File: preqc.cpp Project: richarddurbin/sga

void generate_errors_per_base(JSONWriter* pWriter, const BWTIndexSet& index_set)
{

    int n_samples = 100000;
    size_t k = 25;

    double max_error_rate = 0.95;
    size_t min_overlap = 50;
    
    std::vector<size_t> position_count;
    std::vector<size_t> error_count;

    Timer timer("test", true);
#if HAVE_OPENMP
        omp_set_num_threads(opt::numThreads);
        #pragma omp parallel for
#endif
    for(int i = 0; i < n_samples; ++i)
    {
        std::string s = BWTAlgorithms::sampleRandomString(index_set.pBWT);
        KmerOverlaps::retrieveMatches(s, k, min_overlap, max_error_rate, 2, index_set);
        //KmerOverlaps::approximateMatch(s, min_overlap, max_error_rate, 2, 200, index_set);

        MultipleAlignment ma = 
            KmerOverlaps::buildMultipleAlignment(s, k, min_overlap, max_error_rate, 2, index_set);

        // Skip when there is insufficient depth to classify errors
        size_t ma_rows = ma.getNumRows();
        if(ma_rows <= 1)
            continue;

        size_t ma_cols = ma.getNumColumns();
        size_t position = 0;
        for(size_t j = 0; j < ma_cols; ++j)
        {
            char s_symbol = ma.getSymbol(0, j);

            // Skip gaps
            if(s_symbol == '-' || s_symbol == '\0')
                continue;
            
            SymbolCountVector scv = ma.getSymbolCountVector(j);
            int s_symbol_count = 0;
            char max_symbol = 0;
            int max_count = 0;

            for(size_t k = 0; k < scv.size(); ++k)
            {
                if(scv[k].symbol == s_symbol)
                    s_symbol_count = scv[k].count;
                if(scv[k].count > max_count)
                {
                    max_count = scv[k].count;
                    max_symbol = scv[k].symbol;
                }
            }

            //printf("P: %zu S: %c M: %c MC: %d\n", position, s_symbol, max_symbol, max_count);

            // Call an error at this position if the consensus symbol differs from the read
            //    and the support for the read symbol is less than 4 and the consensus symbol
            //    is strongly supported.
            bool is_error = s_symbol != max_symbol && s_symbol_count < 4 && max_count >= 3;

#if HAVE_OPENMP
            #pragma omp critical
#endif
            {
                if(position >= position_count.size())
                {
                    position_count.resize(position+1);
                    error_count.resize(position+1);
                }

                position_count[position]++;
                error_count[position] += is_error;
            }
            position += 1;
        }
    }
    
    pWriter->String("ErrorsPerBase");
    pWriter->StartObject();
    
    pWriter->String("base_count");
    pWriter->StartArray();
    for(size_t i = 0; i < position_count.size(); ++i)
        pWriter->Int(position_count[i]);
    pWriter->EndArray();
    
    pWriter->String("error_count");
    pWriter->StartArray();
    for(size_t i = 0; i < position_count.size(); ++i)
        pWriter->Int(error_count[i]);
    pWriter->EndArray();

    pWriter->EndObject();
}