int _tmain(int argc, _TCHAR* argv[]) { Tree *tree = new Tree(); TreeNode *tn = tree->BuildTree(); tree->PrintTree(tn, tree->InOrder); // tree->InvertBinaryTree(tn); // tree->PrintTree(tn, tree->InOrder); LevelOrderTraversal(tn); }
int main (int argc, char *argv[]) { int i; PlatformSupport* Plat = new PlatformSupport(); ColumnComp* CC; Alignment* ALIGN; Tree* T; MultipleAlignment* MA; ProteinDomains* PROTS =NULL; MultiAlignRec* pssmAlignment; char outFileName[STR_LEN]; strcpy(outFileName, "out"); bool colChosen=false, alignChosen=false, treeChosen=false, maChosen=false, usingDomains=false, inputProvided=false, scoresProvided=false; bool neuralTree=false; bool testing=false;bool testingAcc=false; bool testingTree=false; bool famNames=false; bool treeClusts=false; bool printTreeClusts=false; bool ma_off=false; bool tree_loocv=false;//true; bool silent=false, htmlOutput=false; bool simMatching=false; bool weighting_on=false; int matchTopX = TOP_MATCH; char inputTFs[STR_LEN]; char matchTFs[STR_LEN]; char scoreDist[STR_LEN]; char inputProteins[STR_LEN]; //Misc option settings bool genRandMotifs=false; bool genRandScores=false; char randMatOut[STR_LEN]; char scoresOut[STR_LEN]; //Default alignment settings double gapOpen = DFLT_GAP_OPEN; double gapExtend = DFLT_GAP_EXTEND; bool overlapAlign = DFLT_OVLP_ALIGN; bool extendOverlap=false; bool FBP_on = false; bool preAlign=false; bool pairwiseOnly=false; bool forwardAlignOnly=false; bool ungapped=false; for(i=1; i<argc; i++){ if(strcmp(argv[i], "-silent")==0) silent=true; if(strcmp(argv[i], "-html")==0) htmlOutput=true; } //Welcome message if(!silent && !htmlOutput){printf("\n\tSTAMP\n\tSimilarity, Tree-building, & Alignment of Motifs and Profiles\n\n\tShaun Mahony\n\tDepartment of Computational Biology\n\tUniversity of Pittsburgh\n\tVersion 1.0 (Winter 2006)\n\n");} if(argc ==1) //First and Foremost, the help option { DisplayHelp(); }else{ for(i=1; i<argc; i++) { if(strcmp(argv[i], "-h")==0 || strcmp(argv[i], "?")==0) //First and Foremost, the help option { DisplayHelp(); } if(strcmp(argv[i], "-out")==0) //Output file (for trees & similarity matching) { if(argv[i+1]!=NULL) { strcpy(outFileName, argv[i+1]);} } if(strcmp(argv[i], "-genrand")==0) //Generate random motifs { if(argv[i+1]!=NULL) { strcpy(randMatOut, argv[i+1]);} genRandMotifs=true; } if(strcmp(argv[i], "-genscores")==0) //Generate simulation scores { if(argv[i+1]!=NULL) { strcpy(scoresOut, argv[i+1]);} genRandScores=true; } if((strcmp(argv[i], "-cc")) ==0) //Choose a column comparison measure { if((strcmp(argv[i+1], "PCC"))==0 || (strcmp(argv[i+1], "pcc"))==0){ CC = new PearsonCorrelation(); //Pearson's correllation coefficient }else if((strcmp(argv[i+1], "ALLR"))==0 || (strcmp(argv[i+1], "allr"))==0){ CC = new ALLR(); //ALLR }else if((strcmp(argv[i+1], "ALLR_LL"))==0 || (strcmp(argv[i+1], "allr_ll"))==0){ CC = new ALLR_LL(); //ALLR with lower limit }else if((strcmp(argv[i+1], "CS"))==0 || (strcmp(argv[i+1], "cs"))==0){ CC = new ChiSq(); //Pearson's Chi Square }else if((strcmp(argv[i+1], "KL"))==0 || (strcmp(argv[i+1], "kl"))==0){ CC = new KullbackLieber(); //Kullback-Lieber }else if((strcmp(argv[i+1], "SSD"))==0 || (strcmp(argv[i+1], "ssd"))==0){ CC = new SumSqDiff(); //sum of squared difference }else{ CC = new PearsonCorrelation(); //Default = PCC } colChosen=true; } //check for alignment settings if((strcmp(argv[i], "-go")) ==0){ //Gap Open if(argv[i+1]!=NULL) { gapOpen=strtod(argv[i+1], NULL);} } if((strcmp(argv[i], "-ge")) ==0){ //Gap Extend if(argv[i+1]!=NULL) { gapExtend=strtod(argv[i+1], NULL);} } if((strcmp(argv[i], "-overlapalign")) ==0){ //Only complete overlapping alignments overlapAlign = true; if(!silent && !htmlOutput){printf("Overlapping alignments only\n");} }if((strcmp(argv[i], "-nooverlapalign")) ==0){ //All overlapping alignments overlapAlign = false; } if((strcmp(argv[i], "-extendoverlap")) ==0){ extendOverlap=true; if(!silent && !htmlOutput){printf("Extending the overlapping alignments\n");} } if((strcmp(argv[i], "-forwardonly")) ==0){ //Consider forward alignments only forwardAlignOnly = true; if(!silent && !htmlOutput){printf("Considering forward direction alignments only\n");} } if((strcmp(argv[i], "-printpairwise")) ==0){ pairwiseOnly=true; if(!silent && !htmlOutput){printf("Printing pairwise scores only\n");} } if((strcmp(argv[i], "-FBP")) ==0){ FBP_on=true; if(!silent && !htmlOutput){printf("Using FBP profiles\n");} } if((strcmp(argv[i], "-useweighting")) ==0){ weighting_on=true; if(!silent && !htmlOutput){printf("Using weighting in FBP construction\n");} } if((strcmp(argv[i], "-prealigned")) ==0){ preAlign=true; if(!silent && !htmlOutput){printf("Profiles are pre-aligned\n");} } //Input TF dataset name if((strcmp(argv[i], "-tf")) ==0) { if(argv[i+1]!=NULL) { strcpy(inputTFs, argv[i+1]);} inputProvided=true; } //Score distribution file Make an auto function for this!!!!!!! if((strcmp(argv[i], "-sd")) ==0) { if(argv[i+1]!=NULL) { strcpy(scoreDist, argv[i+1]);} scoresProvided=true; } //Match input TFs against this dataset if((strcmp(argv[i], "-match")) ==0) { if(argv[i+1]!=NULL) { strcpy(matchTFs, argv[i+1]);} if(argv[i+2]!=NULL && strcmp(argv[i+2], "fams")==0){ famNames=true; } simMatching=true; } if((strcmp(argv[i], "-match_top")) ==0){ //Report the top X matches if(argv[i+1]!=NULL) { matchTopX=strtol(argv[i+1], NULL, 10);} } //Matching input protein (Pfam) alignment dataset name if((strcmp(argv[i], "-prot")) ==0) { if(argv[i+1]!=NULL) { strcpy(inputProteins, argv[i+1]);} usingDomains = true; } //Run some tests if((strcmp(argv[i], "-test")) ==0) { testing=true; } //Run some different tests if((strcmp(argv[i], "-testacc")) ==0) { testingAcc=true; famNames=true; } //Run some tests with trees if((strcmp(argv[i], "-testtree")) ==0) { testingTree=true; famNames=true; }//Run Calinski & Harabasz with trees if((strcmp(argv[i], "-ch")) ==0) { testingTree=true; treeClusts=true; }//Run Calinski & Harabasz with trees and print the resulting clusters if((strcmp(argv[i], "-chp")) ==0) { testingTree=true; printTreeClusts=true; treeClusts=true; } } //Defaults if(!colChosen) { CC = new PearsonCorrelation();} //Second Pass for(i=1; i<argc; i++) { if((strcmp(argv[i], "-align")) ==0) //Choose an alignment method { if((strcmp(argv[i+1], "NW"))==0 || (strcmp(argv[i+1], "nw"))==0){ ALIGN = new NeedlemanWunsch(CC, gapOpen, gapExtend, overlapAlign, extendOverlap, forwardAlignOnly); } if((strcmp(argv[i+1], "SWU"))==0 || (strcmp(argv[i+1], "swu"))==0){ ALIGN = new SmithWatermanUngappedExtended(CC,forwardAlignOnly); ungapped=true; } if((strcmp(argv[i+1], "SWA"))==0 || (strcmp(argv[i+1], "swa"))==0){ ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly); } if((strcmp(argv[i+1], "SW"))==0 || (strcmp(argv[i+1], "sw"))==0){ ALIGN = new SmithWaterman(CC, gapOpen, gapExtend, overlapAlign, extendOverlap,forwardAlignOnly); } alignChosen = true; } //Choose a multiple alignment method if((strcmp(argv[i], "-ma")) ==0) { if((strcmp(argv[i+1], "PPA"))==0 || (strcmp(argv[i+1], "ppa"))==0){ MA = new ProgressiveProfileAlignment(outFileName, htmlOutput); maChosen=true; } if((strcmp(argv[i+1], "IR"))==0 || (strcmp(argv[i+1], "ir"))==0){ MA = new IterativeRefinementAlignment(outFileName, htmlOutput); maChosen=true; } if((strcmp(argv[i+1], "NONE"))==0 || (strcmp(argv[i+1], "none"))==0){ maChosen=true; ma_off=true; } } } if(!alignChosen) { ALIGN = new SmithWatermanAffine(CC, gapOpen, gapExtend, overlapAlign, extendOverlap); } if(!maChosen) MA = new ProgressiveProfileAlignment(outFileName, htmlOutput); //Third pass //Choose a tree-construction method for(i=1; i<argc; i++) { if((strcmp(argv[i], "-tree")) ==0) { if((strcmp(argv[i+1], "UPGMA"))==0 || (strcmp(argv[i+1], "upgma"))==0){ T = new UPGMA(ALIGN); } if((strcmp(argv[i+1], "SOTA"))==0 || (strcmp(argv[i+1], "sota"))==0){ T = new SOTA(ALIGN, MA); neuralTree=true; } if((strcmp(argv[i+1], "NJ"))==0 || (strcmp(argv[i+1], "nj"))==0){ T = new Neighbourjoin(ALIGN); printf("Using Neighbour-joining... ensure that the distance metric is additive\n"); } if((strcmp(argv[i+1], "TDHC"))==0 || (strcmp(argv[i+1], "tdhc"))==0){ T = new TopDownHClust(ALIGN, MA); neuralTree=true; } treeChosen=true; } } if(!treeChosen) T = new UPGMA(ALIGN); T->BeQuiet(silent); //////////////////////////////////////////////////////////////////////////////////// //////// Main Program ///////////////////////////////////////////////////////////// //Initialise the background Plat->ReadBackground(); if(inputProvided){ //Read in the matrices Plat->ReadTransfacFile(inputTFs, famNames,true, weighting_on); if(!silent && !htmlOutput){ printf("MatCount: %d\n", Plat->GetMatCount()); if(ungapped) printf("Ungapped Alignment\n"); else printf("Gap open = %.3lf, gap extend = %.3lf\n", gapOpen, gapExtend); } }else{ printf("No input motifs provided!\n\n"); } if(genRandMotifs){ //Generate some random matrices RandPSSMGen* RPG = new RandPSSMGen(Plat->inputMotifs, Plat->GetMatCount(), 10000, randMatOut); RPG->RunGenerator(); } if(genRandScores){ //Find rand dist Plat->GetRandDistrib(scoresOut, ALIGN); }else if(!scoresProvided){ printf("No score distribution file provided!\n\n"); } if(testing){ PlatformTesting* PT = new PlatformTesting(CC); //Print the distribution of column depth // PT->ColumnDepthDist(Plat->inputMotifs, Plat->GetMatCount()); //Print the similarities of all columns against all others // PT->ColumnScoreDist(Plat->inputMotifs, Plat->GetMatCount(), 0.05); double z; for(z=0.25; z<0.8; z+=0.05) PT->RandColumns(Plat, z); for(z=0.8; z<=1.0; z+=0.01) PT->RandColumns(Plat, z); delete(PT); } if(scoresProvided || preAlign){ Plat->ReadScoreDists(scoreDist); if(!silent && !htmlOutput){printf("Scores read\n");} if(Plat->GetMatCount()>1){ if(preAlign){ //No alignments or trees built here pssmAlignment = MA->PreAlignedInput(Plat); }else{ //Multiple alignment procedure Plat->PreAlign(ALIGN); if(pairwiseOnly){ if(!silent && !htmlOutput){printf("\nPairwise alignment scores:\n");} Plat->PrintPairwise(); }if(!ma_off){ MA->ImportBasics(Plat, ALIGN); if(!silent && !htmlOutput){printf("Alignments Finished\n");} if(!testingAcc){ if(tree_loocv && testingTree){ T->LOOCVBuildTree(Plat, testingTree); }else{ if(testingTree && !silent && !htmlOutput){printf("Calinski & Harabasz:\n\tNumClust\tC&H_Metric\n");} T->BuildTree(Plat, testingTree); if(!silent && treeClusts){printf("The Calinski & Harabasz statistic suggests %.0lf clusters in the input motifs\n", T->GetNodesMinCH());} if(printTreeClusts){ T->PrintLevel(outFileName, int(T->GetNodesMinCH())); } } T->PrintTree(outFileName); if(!silent && !htmlOutput){printf("Tree Built\n");} if(!silent){ if(!silent && !htmlOutput){printf("Multiple Alignment:\n");} pssmAlignment = MA->BuildAlignment(Plat, ALIGN, T); } } } } //Experiment with the Protein Domains if(usingDomains){ PROTS = new ProteinDomains(); PROTS->ReadDomains(inputProteins, Plat->inputMotifs, Plat->GetMatCount()); PROTS->MutualInformation(pssmAlignment, MA->Alignment2Profile(pssmAlignment, "AlignmentMotif"), Plat->inputMotifs, Plat->GetMatCount()); delete PROTS; } } //Similarity match against the database if(simMatching){ Plat->ReadTransfacFile(matchTFs, famNames, false, false); Plat->SimilarityMatching(ALIGN, outFileName, famNames, matchTopX); } } if(testingAcc && scoresProvided && inputProvided && Plat->GetMatCount()>1){ PlatformTesting* PT = new PlatformTesting(CC); PT->PairwisePredictionAccuracy(Plat); } delete(MA); delete(T); delete(CC); delete(ALIGN); } delete(Plat); return(0); }