Example #1
0
int _tmain(int argc, _TCHAR* argv[])
{
    Tree *tree = new Tree();

    TreeNode *tn = tree->BuildTree();
    tree->PrintTree(tn, tree->InOrder);

//	tree->InvertBinaryTree(tn);
//	tree->PrintTree(tn, tree->InOrder);

    LevelOrderTraversal(tn);
		
}
Example #2
0
int main (int argc, char *argv[])
{
	int i;
	PlatformSupport* Plat = new PlatformSupport();
    ColumnComp* CC;
	Alignment* ALIGN;
	Tree* T;
	MultipleAlignment* MA;
	ProteinDomains* PROTS =NULL;
	MultiAlignRec* pssmAlignment;
	char outFileName[STR_LEN];
	strcpy(outFileName, "out");
    bool colChosen=false, alignChosen=false, treeChosen=false, maChosen=false, usingDomains=false, inputProvided=false, scoresProvided=false;
	bool neuralTree=false; bool testing=false;bool testingAcc=false; bool testingTree=false; bool famNames=false; bool treeClusts=false; bool printTreeClusts=false;
	bool ma_off=false;
	bool tree_loocv=false;//true;
	bool silent=false, htmlOutput=false; bool simMatching=false;
	bool weighting_on=false;
	int matchTopX = TOP_MATCH;

	char inputTFs[STR_LEN];
	char matchTFs[STR_LEN];
	char scoreDist[STR_LEN];
	char inputProteins[STR_LEN];

	//Misc option settings
	bool genRandMotifs=false;
	bool genRandScores=false;
	char randMatOut[STR_LEN];
	char scoresOut[STR_LEN];
	//Default alignment settings
	double gapOpen = DFLT_GAP_OPEN;
	double gapExtend = DFLT_GAP_EXTEND;
	bool overlapAlign = DFLT_OVLP_ALIGN;
	bool extendOverlap=false;
	bool FBP_on = false;
	bool preAlign=false;
    bool pairwiseOnly=false;
    bool forwardAlignOnly=false;
	bool ungapped=false;

	for(i=1; i<argc; i++){
		if(strcmp(argv[i], "-silent")==0)
			silent=true;
		if(strcmp(argv[i], "-html")==0)
			htmlOutput=true;
	}

	//Welcome message
	if(!silent && !htmlOutput){printf("\n\tSTAMP\n\tSimilarity, Tree-building, & Alignment of Motifs and Profiles\n\n\tShaun Mahony\n\tDepartment of Computational Biology\n\tUniversity of Pittsburgh\n\tVersion 1.0 (Winter 2006)\n\n");}

	if(argc ==1) //First and Foremost, the help option
	{	DisplayHelp();
	}else{

	for(i=1; i<argc; i++)
	{
		if(strcmp(argv[i], "-h")==0 || strcmp(argv[i], "?")==0) //First and Foremost, the help option
		{	DisplayHelp();
		}
		if(strcmp(argv[i], "-out")==0) //Output file (for trees & similarity matching)
		{	if(argv[i+1]!=NULL)
			{ strcpy(outFileName, argv[i+1]);}
		}
		if(strcmp(argv[i], "-genrand")==0) //Generate random motifs
		{	if(argv[i+1]!=NULL)
			{ strcpy(randMatOut, argv[i+1]);}
			genRandMotifs=true;
		}
		if(strcmp(argv[i], "-genscores")==0) //Generate simulation scores
		{	if(argv[i+1]!=NULL)
			{ strcpy(scoresOut, argv[i+1]);}
			genRandScores=true;
		}
		if((strcmp(argv[i], "-cc")) ==0)  //Choose a column comparison measure
		{
			if((strcmp(argv[i+1], "PCC"))==0 || (strcmp(argv[i+1], "pcc"))==0){
				CC = new PearsonCorrelation(); //Pearson's correllation coefficient
			}else if((strcmp(argv[i+1], "ALLR"))==0 || (strcmp(argv[i+1], "allr"))==0){
				CC = new ALLR(); //ALLR
			}else if((strcmp(argv[i+1], "ALLR_LL"))==0 || (strcmp(argv[i+1], "allr_ll"))==0){
				CC = new ALLR_LL(); //ALLR with lower limit
			}else if((strcmp(argv[i+1], "CS"))==0 || (strcmp(argv[i+1], "cs"))==0){
				CC = new ChiSq(); //Pearson's Chi Square
			}else if((strcmp(argv[i+1], "KL"))==0 || (strcmp(argv[i+1], "kl"))==0){
				CC = new KullbackLieber(); //Kullback-Lieber
			}else if((strcmp(argv[i+1], "SSD"))==0 || (strcmp(argv[i+1], "ssd"))==0){
				CC = new SumSqDiff(); //sum of squared difference
			}else{
				CC = new PearsonCorrelation(); //Default = PCC
			}
			colChosen=true;
		}
		//check for alignment settings
		if((strcmp(argv[i], "-go")) ==0){ //Gap Open
			if(argv[i+1]!=NULL)
			{	gapOpen=strtod(argv[i+1], NULL);}
		}
		if((strcmp(argv[i], "-ge")) ==0){ //Gap Extend
			if(argv[i+1]!=NULL)
			{	gapExtend=strtod(argv[i+1], NULL);}
		}
		if((strcmp(argv[i], "-overlapalign")) ==0){ //Only complete overlapping alignments
			overlapAlign = true; if(!silent && !htmlOutput){printf("Overlapping alignments only\n");}
		}if((strcmp(argv[i], "-nooverlapalign")) ==0){ //All overlapping alignments
			overlapAlign = false;
		}
		if((strcmp(argv[i], "-extendoverlap")) ==0){
			extendOverlap=true; if(!silent && !htmlOutput){printf("Extending the overlapping alignments\n");}
		}
        if((strcmp(argv[i], "-forwardonly")) ==0){ //Consider forward alignments only
            forwardAlignOnly = true;
            if(!silent && !htmlOutput){printf("Considering forward direction alignments only\n");}
        }
		if((strcmp(argv[i], "-printpairwise")) ==0){
			pairwiseOnly=true; if(!silent && !htmlOutput){printf("Printing pairwise scores only\n");}
		}
		if((strcmp(argv[i], "-FBP")) ==0){
			FBP_on=true; if(!silent && !htmlOutput){printf("Using FBP profiles\n");}
		}
		if((strcmp(argv[i], "-useweighting")) ==0){
			weighting_on=true; if(!silent && !htmlOutput){printf("Using weighting in FBP construction\n");}
		}
		if((strcmp(argv[i], "-prealigned")) ==0){
			preAlign=true; if(!silent && !htmlOutput){printf("Profiles are pre-aligned\n");}
		}

		//Input TF dataset name
		if((strcmp(argv[i], "-tf")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(inputTFs, argv[i+1]);}
			inputProvided=true;
		}
		//Score distribution file   Make an auto function for this!!!!!!!
		if((strcmp(argv[i], "-sd")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(scoreDist, argv[i+1]);}
			scoresProvided=true;
		}
		//Match input TFs against this dataset
		if((strcmp(argv[i], "-match")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(matchTFs, argv[i+1]);}
			if(argv[i+2]!=NULL && strcmp(argv[i+2], "fams")==0){
				famNames=true;
			}
			simMatching=true;
		}
		if((strcmp(argv[i], "-match_top")) ==0){ //Report the top X matches
			if(argv[i+1]!=NULL)
			{	matchTopX=strtol(argv[i+1], NULL, 10);}
		}
		//Matching input protein (Pfam) alignment dataset name
		if((strcmp(argv[i], "-prot")) ==0)
		{	if(argv[i+1]!=NULL)
			{ strcpy(inputProteins, argv[i+1]);}
			usingDomains = true;
		}
		//Run some tests
		if((strcmp(argv[i], "-test")) ==0)
		{	testing=true;
		}
		//Run some different tests
		if((strcmp(argv[i], "-testacc")) ==0)
		{	testingAcc=true;
			famNames=true;
		}
		//Run some tests with trees
		if((strcmp(argv[i], "-testtree")) ==0)
		{	testingTree=true;
			famNames=true;
		}//Run Calinski & Harabasz with trees
		if((strcmp(argv[i], "-ch")) ==0)
		{	testingTree=true; treeClusts=true;
		}//Run Calinski & Harabasz with trees and print the resulting clusters
		if((strcmp(argv[i], "-chp")) ==0)
		{	testingTree=true;
			printTreeClusts=true; treeClusts=true;
		}
	}
	//Defaults
	if(!colChosen)
	{	CC = new PearsonCorrelation();}

	//Second Pass
	for(i=1; i<argc; i++)
	{
		if((strcmp(argv[i], "-align")) ==0)  //Choose an alignment method
		{
            if((strcmp(argv[i+1], "NW"))==0 || (strcmp(argv[i+1], "nw"))==0){
                ALIGN = new NeedlemanWunsch(CC, gapOpen, gapExtend, overlapAlign, extendOverlap, forwardAlignOnly);
            }
            if((strcmp(argv[i+1], "SWU"))==0 || (strcmp(argv[i+1], "swu"))==0){
                ALIGN = new SmithWatermanUngappedExtended(CC,forwardAlignOnly); ungapped=true;
            }
            if((strcmp(argv[i+1], "SWA"))==0 || (strcmp(argv[i+1], "swa"))==0){
                ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly);
            }
            if((strcmp(argv[i+1], "SW"))==0 || (strcmp(argv[i+1], "sw"))==0){
                ALIGN = new SmithWaterman(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly);
            }
            alignChosen = true;
		}
		//Choose a multiple alignment method
		if((strcmp(argv[i], "-ma")) ==0)
		{
			if((strcmp(argv[i+1], "PPA"))==0 || (strcmp(argv[i+1], "ppa"))==0){
				MA = new ProgressiveProfileAlignment(outFileName, htmlOutput);
				maChosen=true;
			}
			if((strcmp(argv[i+1], "IR"))==0 || (strcmp(argv[i+1], "ir"))==0){
				MA = new IterativeRefinementAlignment(outFileName, htmlOutput);
				maChosen=true;
			}
			if((strcmp(argv[i+1], "NONE"))==0 || (strcmp(argv[i+1], "none"))==0){
				maChosen=true; ma_off=true;
			}
		}
	}
	if(!alignChosen)
	{	ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap);
	}
	if(!maChosen)
		MA = new ProgressiveProfileAlignment(outFileName, htmlOutput);
	//Third pass
	//Choose a tree-construction method
	for(i=1; i<argc; i++)
	{	if((strcmp(argv[i], "-tree")) ==0)
		{
			if((strcmp(argv[i+1], "UPGMA"))==0 || (strcmp(argv[i+1], "upgma"))==0){
				T = new UPGMA(ALIGN);
			}
			if((strcmp(argv[i+1], "SOTA"))==0 || (strcmp(argv[i+1], "sota"))==0){
				T = new SOTA(ALIGN, MA); neuralTree=true;
			}
			if((strcmp(argv[i+1], "NJ"))==0 || (strcmp(argv[i+1], "nj"))==0){
				T = new Neighbourjoin(ALIGN); printf("Using Neighbour-joining... ensure that the distance metric is additive\n");
			}
			if((strcmp(argv[i+1], "TDHC"))==0 || (strcmp(argv[i+1], "tdhc"))==0){
				T = new TopDownHClust(ALIGN, MA); neuralTree=true;
			}
			treeChosen=true;
		}
	}
	if(!treeChosen)
		T = new UPGMA(ALIGN);
	T->BeQuiet(silent);

////////////////////////////////////////////////////////////////////////////////////
//////// Main Program /////////////////////////////////////////////////////////////

	//Initialise the background
	Plat->ReadBackground();
	if(inputProvided){
		//Read in the matrices
		Plat->ReadTransfacFile(inputTFs, famNames,true, weighting_on);
		if(!silent && !htmlOutput){
			printf("MatCount: %d\n", Plat->GetMatCount());
			if(ungapped)
				printf("Ungapped Alignment\n");
			else
				printf("Gap open = %.3lf, gap extend = %.3lf\n", gapOpen, gapExtend);
		}
	}else{
		printf("No input motifs provided!\n\n");
	}
	if(genRandMotifs){
		//Generate some random matrices
		RandPSSMGen* RPG = new RandPSSMGen(Plat->inputMotifs, Plat->GetMatCount(), 10000, randMatOut);
		RPG->RunGenerator();
	}
	if(genRandScores){
		//Find rand dist
		Plat->GetRandDistrib(scoresOut, ALIGN);
	}else if(!scoresProvided){
		printf("No score distribution file provided!\n\n");
	}
	if(testing){
		PlatformTesting* PT = new PlatformTesting(CC);
		//Print the distribution of column depth
	//	PT->ColumnDepthDist(Plat->inputMotifs, Plat->GetMatCount());
		//Print the similarities of all columns against all others
	//	PT->ColumnScoreDist(Plat->inputMotifs, Plat->GetMatCount(), 0.05);
		double z;
		for(z=0.25; z<0.8; z+=0.05)
			PT->RandColumns(Plat, z);
		for(z=0.8; z<=1.0; z+=0.01)
			PT->RandColumns(Plat, z);
		delete(PT);
	}

	if(scoresProvided || preAlign){

		Plat->ReadScoreDists(scoreDist);
		if(!silent && !htmlOutput){printf("Scores read\n");}
		if(Plat->GetMatCount()>1){
			if(preAlign){
				//No alignments or trees built here
				pssmAlignment = MA->PreAlignedInput(Plat);
			}else{
				//Multiple alignment procedure
				Plat->PreAlign(ALIGN);
				if(pairwiseOnly){
					if(!silent && !htmlOutput){printf("\nPairwise alignment scores:\n");}
					Plat->PrintPairwise();
				}if(!ma_off){
					MA->ImportBasics(Plat, ALIGN);
					if(!silent && !htmlOutput){printf("Alignments Finished\n");}
					if(!testingAcc){
						if(tree_loocv && testingTree){
							T->LOOCVBuildTree(Plat, testingTree);
						}else{
							if(testingTree && !silent && !htmlOutput){printf("Calinski & Harabasz:\n\tNumClust\tC&H_Metric\n");}
							T->BuildTree(Plat, testingTree);
							if(!silent && treeClusts){printf("The Calinski & Harabasz statistic suggests %.0lf clusters in the input motifs\n", T->GetNodesMinCH());}
							if(printTreeClusts){
								T->PrintLevel(outFileName, int(T->GetNodesMinCH()));
							}
						}
						T->PrintTree(outFileName);

						if(!silent && !htmlOutput){printf("Tree Built\n");}

						if(!silent){
							if(!silent && !htmlOutput){printf("Multiple Alignment:\n");}
							pssmAlignment = MA->BuildAlignment(Plat, ALIGN, T);
						}
					}
				}
			}

			//Experiment with the Protein Domains
			if(usingDomains){
				PROTS = new ProteinDomains();
				PROTS->ReadDomains(inputProteins, Plat->inputMotifs, Plat->GetMatCount());
				PROTS->MutualInformation(pssmAlignment, MA->Alignment2Profile(pssmAlignment, "AlignmentMotif"), Plat->inputMotifs, Plat->GetMatCount());
				delete PROTS;
			}
		}
		//Similarity match against the database
		if(simMatching){
			Plat->ReadTransfacFile(matchTFs, famNames, false, false);
			Plat->SimilarityMatching(ALIGN, outFileName, famNames, matchTopX);
		}
	}

	if(testingAcc && scoresProvided && inputProvided && Plat->GetMatCount()>1){
		PlatformTesting* PT = new PlatformTesting(CC);
		PT->PairwisePredictionAccuracy(Plat);
	}

	delete(MA);
	delete(T);
	delete(CC);
	delete(ALIGN);
	}
delete(Plat);
return(0);
}