// -------------- NewickToNexus ------------------ void NewickToNexus(phylo P) { //SWK - A Nexus wrapper takes a Newick file and makes it Mesquite readable time_t rawtime; int i; char tmp[24]; time ( &rawtime ); strncpy(tmp , ctime(&rawtime), 24); printf("#NEXUS\n[output from phylocom, written %s]\n\n", tmp ); printf("BEGIN TAXA;\n"); printf("TITLE Phylocom_Phylogeny_Taxa;\n"); // Needed for correct Mesquite grammar, but V1.1 busted! Will not read interior names correctly. printf("\tDIMENSIONS NTAX=%d;\n\tTAXLABELS\n\t", P.termtaxa); for (i = 0; i < P.nnodes; i++) { if (P.noat[i] == 0) printf(" %s", P.taxon[i]); } printf(";\nEND;\n\n"); printf("BEGIN TREES;\n"); printf("\tTITLE Phylocom_Phylogenies;\n\tLINK Taxa = Phylocom_Phylogeny_Taxa;\n"); // Ditto! //printf("\tTRANSLATE\n\t"); //for (i = 0; i < P.ntaxa; i++) printf(" %d %s;\n", i, P.taxon[P.t2n[i]]); printf("\tTREE %s = ", P.phyname); //Is there a way to exclude node labels? Might be useful... see fy2new.c Fy2newRec(P); printf("END;\n"); }
void Comnode(phylo t1, phylo t2) { int i, j, xnode, depth, common, matches1, matches2, p, q; int matchcode = 0; int t1md = 0; int t2md = 0; int *active_n; int *matched1; int *matched2; int **comlist1; // contents: node number of a term taxon (use to lookup name) // indexed by node, and then an index 0... int *comlist1n; // the number of items in *comlist int **comlist2; // contents: node number of a term taxon (use to lookup name) // indexed by node, and then an index 0... int *comlist2n; // the number of items in *comlist char tmp[50]; phylo Out[1]; strcpy(t1.phyname, "Tree1"); // t1.arenotes = 1; // t1.notes = cmatrix(0, t1.nnodes-1, 0, 49); strcpy(t2.phyname, "Tree2"); // t2.arenotes = 1; // t2.notes = cmatrix(0, t2.nnodes-1, 0, 49); comlist1 = imatrix(0, t1.nnodes-1, 0, t1.termtaxa); comlist1n = ivector(0, t1.nnodes-1); comlist2 = imatrix(0, t2.nnodes-1, 0, t2.termtaxa); comlist2n = ivector(0, t2.nnodes-1); matched1 = ivector(0, t1.termtaxa-1); matched2 = ivector(0, t2.termtaxa-1); // printf("t1: nnodes: %d taxa1: %s\n", t1.nnodes, t1.taxalist[0]); // printf("t2: nnodes: %d taxa1: %s\n", t2.nnodes, t2.taxalist[0]); // make a full list of term taxa from each node: // tree1: for (i = 0; i < t1.nnodes; i++) { if (t1.depth[i] > t1md) t1md = t1.depth[i]; // need to clear the node names - they could cause confusion // when bladjing, beacause the default names are the same for both // trees if (t1.noat[i] > 0) strcpy(t1.taxon[i], "."); // these will get unmaned: see fy2new } active_n = ivector(0, t1.nnodes-1); for (i = 0; i < t1.nnodes; i++) { active_n[i] = 1; comlist1n[i] = 0; for (depth = t1.depth[i]; depth <= t1md; depth++) { for (xnode = 0; xnode < t1.nnodes; xnode++) { if ( (t1.depth[xnode] == depth) && (active_n[xnode] == 1) ) { // first we check to see if we have reached a tip if (t1.noat[xnode] == 0) { // test to see that it is a common spp to tree 2 common = 0; for (j = 0; j < t2.termtaxa; j++) { if (strcmp(t1.taxon[xnode], t2.taxalist[j]) == 0)\ common = 1; } if (common ==1) { comlist1[i][comlist1n[i]] = xnode; comlist1n[i]++; //printf("1 dependent to node %d is %s\n", // i, t1.taxon[comlist1[i][comlist1n[i]-1]]); } active_n[xnode] = 0; } else { for (j = 0; j < t1.noat[xnode]; j++) { active_n[t1.down[xnode][j]] = 1; } active_n[xnode] = 0; } } } } } // make a full list of term taxa from each node: // tree2: for (i = 0; i < t2.nnodes; i++) { if (t2.depth[i] > t2md) t2md = t2.depth[i]; // need to clear the node names - they could cause confusion // when bladjing, beacause the default names are the same for both // trees if (t2.noat[i] > 0) strcpy(t2.taxon[i], "."); // these will get unmaned: see fy2new } active_n = ivector(0, t2.nnodes-1); for (i = 0; i < t2.nnodes; i++) { active_n[i] = 1; comlist2n[i] = 0; for (depth = t2.depth[i]; depth <= t2md; depth++) { for (xnode = 0; xnode < t2.nnodes; xnode++) { if ( (t2.depth[xnode] == depth) && (active_n[xnode] == 1) ) { // first we check to see if we have reached a tip if (t2.noat[xnode] == 0) { // test to see that it is a common spp to tree 2 common = 0; for (j = 0; j < t1.termtaxa; j++) { if (strcmp(t2.taxon[xnode], t1.taxalist[j]) == 0)\ common = 1; } if (common ==1) { comlist2[i][comlist2n[i]] = xnode; comlist2n[i]++; //printf("2 dependent to node %d is %s\n", // i, t2.taxon[comlist2[i][comlist2n[i]-1]]); } active_n[xnode] = 0; } else { for (j = 0; j < t2.noat[xnode]; j++) { active_n[t2.down[xnode][j]] = 1; } active_n[xnode] = 0; } } } } } // Now, compare the nodes (once only for each pair) for (i = 0; i < t1.nnodes; i++) { for (j = 0; j < t2.nnodes; j++) { // initialize: for (p = 0; p < comlist1n[i]; p++) matched1[p] = 0; for (q = 0; q < comlist2n[j]; q++) matched2[q] = 0; // here's the crux - each one must have the same sublist to be valid for (p = 0; p < comlist1n[i]; p++) { for(q= 0; q < comlist2n[j]; q++) { if (strcmp(t1.taxon[comlist1[i][p]], t2.taxon[comlist2[j][q]] ) ==0 ) { matched1[p] = 1; matched2[q] = 1; } } } // check matches matches1 = 0; matches2 = 0; for (p = 0; p < comlist1n[i]; p++) { if (matched1[p] == 1) matches1++; } for (q = 0; q < comlist2n[j]; q++) { if (matched2[q] == 1) matches2++; } if ( (matches1 == comlist1n[i]) && (matches2 == comlist2n[j]) && \ (matches1 > 1) && (matches2 > 1)) { sprintf(tmp, "match%d", matchcode++); //printf("tree1node%d matches with tree2node%d\n", i, j); // more correct, but need to just use nodenames for r8s: // strcat(t1.notes[i], tmp); strcat(t1.notes[i], " "); // strcat(t2.notes[j], tmp); strcat(t2.notes[j], " "); // for bladj (note the last one overrides the previous - this // will general be correct strcpy(t1.taxon[i], tmp); strcpy(t2.taxon[j], tmp); } } } // Unname the "." node names (a hack) for (i = 0; i < t1.nnodes; i++) { if (!strcmp(t1.taxon[i], ".")) strcpy(t1.taxon[i], ""); } for (i = 0; i < t2.nnodes; i++) { if (!strcmp(t2.taxon[i], ".")) strcpy(t2.taxon[i], ""); } printf("#NEXUS\n\nBEGIN TREES;\nTREE tree1 = "); Fy2newRec(t1); printf("TREE tree2 = "); Fy2newRec(t2); printf("END;\n"); Out[0] = t1; //WriteNexus(Out, 1, InS, 0 , InC, 0 ); Out[0] = t2; //WriteNexus(Out, 1, InS, 0 , InC, 0 ); }
void Bladj(phylo Intree) { int i, j, q, z, l = 0; int *action; int *AgeFixed; char nameI[50]; float ageI; int matched; char line[201]; // array of characters from input line int lineending; // Dimension things: action = ivector(0, Intree.nnodes -1); AgeFixed = ivector(0, Intree.nnodes -1); // Fix ages for nodes of terminal taxa in Phylogeny: for (i = 0; i < Intree.nnodes; i++) { // terminal nodes if (Intree.noat[i] == 0) { Intree.age[i] = 0.0; AgeFixed[i] = 1; } else { Intree.age[i] = 99999.9; AgeFixed[i] = 0; } } // pre-read lineending = whatnewline(INFILEA); //Fix node ages for nodes found in ages file Fa = fopen(INFILEA, "r"); while (myfgets(line, 200, Fa, lineending) != NULL) { sscanf(line, "%s %f", nameI, &ageI); // string matched = 0; for (z = 0; z < Intree.nnodes; z++) { if (strcmp(Intree.taxon[z], nameI) == 0) { Intree.age[z] = ageI; AgeFixed[z] = 1; matched = 1; } } } fclose(Fa); //TODO will crash if no name/age for deepest node, need to add check for this // The algorithm: // 1. create network of fixed age nodes between the root and the other // fixed age nodes, choosing the order of nodes to operate on first // using age, then number of intervening nodes. for (i = 0; i < Intree.nnodes; i++) { q = 0; for (j = i+1; j < Intree.nnodes; j++) { // find all the line-of-site ages action[q] = 0; // correct for errors in ages if ((LineOfSight(Intree, AgeFixed, i, j) == 1) && \ (Intree.age[j] >= Intree.age[i])) AgeFixed[j] = 0; if ((AgeFixed[j] == 1) && (LineOfSight(Intree, AgeFixed, i, j) == 1)) { // printf("%d+%d ",i, j); action[q] = j; q++; } } // Now sort the action SortAction(Intree, action, q, i); // Adjust lengths for (l = 0; l < q; l++) { Adjust(Intree, AgeFixed, i, action[l]); //printf("i%d action%d\n",i, action[l]); } } if (FYOUT) FyOut(Intree) ; else Fy2newRec(Intree); }
// -------------- WriteNexus ------------------ void WriteNexus(phylo P[], int ntree, sample S, int nsamp, traits T, int ntrf) { // Mesquite style! time_t rawtime; int i, j, q, k, x, pass, present; int makedisc, makecont; float abnd; int nterm = 0; phylo WN[ntree]; char tmp[MAXTAXONLENGTH+10]; for (i = 0; i < ntree; i++) { WN[i] = P[i]; // inefficient to make copy so much, but need to // to create third dimension of taxon array // reassign the pointer to a new space - free this! WN[i].taxon = cmatrix(0, P[0].nnodes-1, 0, MAXTAXONLENGTH+10); } // determine number of terminal taxa - assume all trees contain same taxa for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) nterm++; } time ( &rawtime ); strncpy(tmp , ctime(&rawtime), 24); printf("#NEXUS\n[output from phylocom, written %s]\n\n", tmp ); printf("BEGIN TAXA;\n"); if (TreeView == 0) printf("TITLE Phylocom_Phylogeny_Taxa;\n"); // Needed for correct Mesquite grammar, but V1.1 busted! Will not read interior names correctly. printf("\tDIMENSIONS NTAX=%d;\n\tTAXLABELS\n\t", nterm); for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) printf(" %s", P[0].taxon[i]); } printf(";\nEND;\n\n"); if (nsamp > 0) { // Characters printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Presence_in_Sample;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1\";\n", S.nsamples); printf("\tCHARSTATELABELS\n\t\t"); printf("%d %s", 1, S.pname[0]); for (i = 1; i < S.nsamples; i++) { printf(", %d %s", i+1, S.pname[i]); } printf(";\n\tMATRIX\n"); for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) { printf("\t%s\t" , P[0].taxon[i]); for (j = 0; j < S.nsamples; j++) { present = 0; for (k = 0; k < S.srec[j]; k++) { if (strcmp(S.taxa[S.id[j][k]], P[0].taxon[i]) == 0) present = 1; } printf("%d", present); } printf("\n"); } } printf(";\nEND;\n\n"); // Abundances as continuous printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Abundance_in_Sample;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;\n", S.nsamples); printf("\tCHARSTATELABELS\n\t\t"); printf("%d %s", 1, S.pname[0]); for (i = 1; i < S.nsamples; i++) { printf(", %d %s", i+1, S.pname[i]); } printf(";\n\tMATRIX\n"); for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) { printf("\t%s\t" , P[0].taxon[i]); for (j = 0; j < S.nsamples; j++) { abnd = 0.0; for (k = 0; k < S.srec[j]; k++) { if (strcmp(S.taxa[S.id[j][k]], P[0].taxon[i]) == 0) abnd = (float) S.abund[j][k]; } printf(" %f", abnd); } printf("\n"); } } printf(";\nEND;\n\n"); } if (ntrf > 0) { makedisc = 0; makecont = 0; pass = 0; for (i = 0; i < T.ntraits; i++) { if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) makedisc++; if (T.type[i] == 3) makecont++; } if (makedisc > 0) { // Discrete Traits printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Discrete_Traits;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = \" 0 1 2 3 4 5 6 7 8 9\";\n", makedisc); x = 1; printf("\tCHARSTATELABELS\n\t\t"); // first one for (i = 0; i < T.ntraits; i++) { if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) { printf("%d %s", x, T.trname[i]); x++; pass = i; break; } } for (i = pass+1; i < T.ntraits; i++) { if ((T.type[i] == 0) || (T.type[i] == 1) || (T.type[i] == 2)) { printf(", %d %s", x, T.trname[i]); x++; } } printf(";\n\tMATRIX\n"); for (i = 0; i < T.ntaxa; i++) { printf("\t%s\t" , T.taxon[i]); for (j = 0; j < T.ntraits; j++) { if ((T.type[j] == 0) || (T.type[j] == 1) || (T.type[j] == 2)) { printf("%d", (int) T.tr[i][j]); } } printf("\n"); } printf(";\nEND;\n\n"); } if (makecont > 0) { // Continous Traits printf("BEGIN CHARACTERS;\n\tTITLE Phylocom_Continuous_Traits;\n\tDIMENSIONS NCHAR=%d;\n\tFORMAT DATATYPE = CONTINUOUS GAP = - MISSING = ?;\n", makecont); x=1; printf("\tCHARSTATELABELS\n\t\t"); // first one for (i = 0; i < T.ntraits; i++) { if (T.type[i] == 3) { printf("%d %s", x, T.trname[i]); x++; pass = i; break; } } for (i = pass+1; i < T.ntraits; i++) { if (T.type[i] == 3) { printf(", %d %s", x, T.trname[i]); x++; } } printf(";\n\tMATRIX\n"); for (i = 0; i < T.ntaxa; i++) { printf("\t%s\t" , T.taxon[i]); for (j = 0; j < T.ntraits; j++) { if (T.type[j] == 3) { printf(" %f", T.tr[i][j]); } } printf("\n"); } printf(";\nEND;\n\n"); } } printf("BEGIN TREES;\n"); if (TreeView == 0) printf("\tTITLE Phylocom_Phylogenies;\n\tLINK Taxa = Phylocom_Phylogeny_Taxa;\n"); // Ditto! printf("\tTRANSLATE\n\t"); for (q = 0; q < ntree; q++) { j = 0; for (i = 0; i < P[0].nnodes; i++) { if (P[0].noat[i] == 0) { j++; if (q == 0) { if (i == P[0].nnodes-1) printf(" %d %s;\n", j, P[0].taxon[i]); else printf(" %d %s,", j, P[0].taxon[i]); } sprintf(tmp, "%d", j); strcpy(WN[q].taxon[i], tmp); } else if ((strcmp(P[q].taxon[i], "") != 0) && \ (strcmp(P[q].taxon[i], ".") != 0)) { strcpy(WN[q].taxon[i], "'"); strcat(WN[q].taxon[i], P[q].taxon[i]); strcat(WN[q].taxon[i], "'"); } else strcpy(WN[q].taxon[i], ""); // test if (strcmp(WN[q].notes[i], "") != 0) printf("%s\n", WN[q].notes[i]); } } for (q = 0; q < ntree; q++) { printf("\tTREE %s = ", WN[q].phyname); Fy2newRec(WN[q]); free_cmatrix(WN[q].taxon, 0, P[0].nnodes-1, 0, MAXTAXONLENGTH+10); } printf("END;\n"); printf("\nBEGIN PHYLOCOM;\n\tTITLE Phylocom_Main;\n\tDATA\n"); for (i = 0; i < S.nsamples; i++) { for (j = 0; j < S.srec[i]; j++) { printf("%s\t%d\t%s\n", S.pname[i], S.abund[i][j], S.taxa[S.id[i][j]]); } } printf(";\nEND;\n"); //free_cmatrix(WN.taxon, 0, P.nnodes-1, 0, MAXTAXONLENGTH+10); }