static char *tree_getNewickTreeStringP(stTree *tree) { char *cA, *cA2; if(stTree_getChildNumber(tree) > 0) { int32_t i; cA = stString_copy("("); for(i=0; i<stTree_getChildNumber(tree); i++) { cA2 = tree_getNewickTreeStringP(stTree_getChild(tree, i)); char *cA3 = stString_print((i+1 < stTree_getChildNumber(tree) ? "%s%s," : "%s%s"), cA, cA2); free(cA); free(cA2); cA = cA3; } cA2 = stString_print("%s)", cA); free(cA); cA = cA2; } else { cA = stString_copy(""); } if(stTree_getLabel(tree) != NULL) { cA2 = stString_print("%s%s", cA, stTree_getLabel(tree)); free(cA); cA = cA2; } if(stTree_getBranchLength(tree) != INFINITY) { char *cA2 = stString_print("%s:%g", cA, stTree_getBranchLength(tree)); free(cA); cA = cA2; } return cA; }
// clone this node, make its supertree a child, and clone all children // other than oldNode, leaving this node as a child of nodeToAddTo static void tree_cloneFlippedTree(stTree *node, stTree *oldNode, stTree *nodeToAddTo, double branchLength) { if(stTree_getParent(node) != NULL) { // This node isn't the root stTree *clonedNode = stTree_cloneNode(node); stTree_setParent(clonedNode, nodeToAddTo); stTree_setBranchLength(clonedNode, branchLength); // Clone its children (other than oldNode) and their subtrees for(int64_t i = 0; i < stTree_getChildNumber(node); i++) { stTree *child = stTree_getChild(node, i); if(child != oldNode) { stTree *clonedChild = stTree_clone(child); stTree_setParent(clonedChild, clonedNode); } } // Recurse on the parent of this node. tree_cloneFlippedTree(stTree_getParent(node), node, clonedNode, stTree_getBranchLength(node)); } else { // We have to treat the root specially, because we're going to // eliminate it. Just add all the other children of the root // as children of nodeToAddTo. for(int64_t i = 0; i < stTree_getChildNumber(node); i++) { stTree *child = stTree_getChild(node, i); if(child != oldNode) { stTree *clonedChild = stTree_clone(child); stTree_setParent(clonedChild, nodeToAddTo); stTree_setBranchLength(clonedChild, stTree_getBranchLength(child) + branchLength); } } } }
static void assignEventsAndSequences(Event *parentEvent, stTree *tree, stSet *outgroupNameSet, char *argv[], int64_t *j) { Event *myEvent = NULL; // To distinguish from the global "event" variable. assert(tree != NULL); totalEventNumber++; if (stTree_getChildNumber(tree) > 0) { myEvent = event_construct3(stTree_getLabel(tree), stTree_getBranchLength(tree), parentEvent, eventTree); for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) { assignEventsAndSequences(myEvent, stTree_getChild(tree, i), outgroupNameSet, argv, j); } } if (stTree_getChildNumber(tree) == 0 || (stTree_getLabel(tree) != NULL && (stSet_search(outgroupNameSet, (char *)stTree_getLabel(tree)) != NULL))) { // This event is a leaf and/or an outgroup, so it has // associated sequence. assert(stTree_getLabel(tree) != NULL); assert(stTree_getBranchLength(tree) != INFINITY); if (stTree_getChildNumber(tree) == 0) { // Construct the leaf event myEvent = event_construct3(stTree_getLabel(tree), stTree_getBranchLength(tree), parentEvent, eventTree); } char *fileName = argv[*j]; if (!stFile_exists(fileName)) { st_errAbort("File does not exist: %s\n", fileName); } // Set the global "event" variable, which is needed for the // function provided to fastaReadToFunction. event = myEvent; if (stFile_isDir(fileName)) { st_logInfo("Processing directory: %s\n", fileName); stList *filesInDir = stFile_getFileNamesInDirectory(fileName); for (int64_t i = 0; i < stList_length(filesInDir); i++) { char *absChildFileName = stFile_pathJoin(fileName, stList_get(filesInDir, i)); assert(stFile_exists(absChildFileName)); setCompleteStatus(absChildFileName); //decide if the sequences in the file should be free or attached. FILE *fileHandle = fopen(absChildFileName, "r"); fastaReadToFunction(fileHandle, processSequence); fclose(fileHandle); free(absChildFileName); } stList_destruct(filesInDir); } else { st_logInfo("Processing file: %s\n", fileName); setCompleteStatus(fileName); //decide if the sequences in the file should be free or attached. FILE *fileHandle = fopen(fileName, "r"); fastaReadToFunction(fileHandle, processSequence); fclose(fileHandle); } (*j)++; } }
/* recursive clone children of node */ static void subrangeCloneChildren(stTree *srcParent, struct malnCompCompMap *srcDestCompMap, stList *pendingSubtrees) { // children are in on-stack array and then passed up pendingSubtrees int numDestChildren = 0; stTree *destChildren[stTree_getChildNumber(srcParent)]; for (int i = 0; i < stTree_getChildNumber(srcParent); i++) { stTree *destNode = subrangeCloneNode(stTree_getChild(srcParent, i), srcDestCompMap, pendingSubtrees); if (destNode != NULL) { destChildren[numDestChildren++] = destNode; } } subrangeSavePendingChildren(numDestChildren, destChildren, pendingSubtrees); }
int stTree_getNumNodes(stTree *root) { int cnt = 1; // this node for (int i = 0; i < stTree_getChildNumber(root); i++) { cnt += stTree_getNumNodes(stTree_getChild(root, i)); } return cnt; }
/* recursively clone a tree */ static stTree *cloneTree(stTree *srcNode, stTree *destParent, struct malnCompCompMap *srcDestCompMap) { stTree *destNode = cloneNode(srcNode, srcDestCompMap); stTree_setParent(destNode, destParent); for (int i = 0; i < stTree_getChildNumber(srcNode); i++) { cloneTree(stTree_getChild(srcNode, i), destNode, srcDestCompMap); } return destNode; }
// Set client data to NULL (optionally recursively). static void stTree_clearClientData(stTree *tree, bool recursive) { stTree_setClientData(tree, NULL); if (recursive) { for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) { stTree_clearClientData(stTree_getChild(tree, i), true); } } }
/* recursively clone a tree */ static stTree *tree_clonetree(stTree *node, stTree *parent2) { stTree *node2 = stTree_cloneNode(node); stTree_setParent(node2, parent2); for (int i = 0; i < stTree_getChildNumber(node); i++) { tree_clonetree(stTree_getChild(node, i), node2); } return node2; }
void stTree_sortChildren(stTree *root, int cmpFn(stTree *a, stTree *b)) { sortChildrenCmpFn = cmpFn; stList_sort(root->nodes, sortChildrenListCmpFn); sortChildrenCmpFn = NULL; for (int i = 0; i < stTree_getChildNumber(root); i++) { stTree_sortChildren(stTree_getChild(root, i), cmpFn); } }
void makeEventHeadersAlphaNumericFn(stTree *tree) { char *cA = makeAlphaNumeric(stTree_getLabel(tree)); stTree_setLabel(tree, cA); free(cA); for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) { makeEventHeadersAlphaNumericFn(stTree_getChild(tree, i)); } }
void checkBranchLengthsAreDefined(stTree *tree) { if (isinf(stTree_getBranchLength(tree))) { st_errAbort("Got a non defined branch length in the input tree: %s.\n", stTree_getNewickTreeString(tree)); } for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) { checkBranchLengthsAreDefined(stTree_getChild(tree, i)); } }
/* get location type in tree */ enum mafTreeLoc mafTreeNodeCompLink_getLoc(struct mafTreeNodeCompLink *ncLink) { if (stTree_getParent(ncLink->node) == NULL) { return mafTreeLocRoot; } else if (stTree_getChildNumber(ncLink->node) == 0) { return mafTreeLocLeaf; } else { return mafTreeLocInternal; } }
/* assert that the tree has no loops (same genome at two levels) */ static void assertNoLoops(struct Genome *genome, stTree *root) { for (int i = 0; i < stTree_getChildNumber(root); i++) { stTree *subNode = stTree_getChild(root, i); if (getNodeComp(subNode)->seq->genome == genome) { errAbort("genome occurs at two levels in the tree: %s", genome->name); } assertNoLoops(genome, subNode); } }
bool stTree_equals(stTree *tree1, stTree *tree2) { if (stTree_getBranchLength(tree1) != stTree_getBranchLength(tree2)) { return false; } if (!stString_eq(stTree_getLabel(tree1), stTree_getLabel(tree2))) { return false; } int numChildren = stTree_getChildNumber(tree1); if (stTree_getChildNumber(tree2) != numChildren) { return false; } for (int i = 0; i < numChildren; i++) { if (!stTree_equals(stTree_getChild(tree1, i), stTree_getChild(tree2, i))) { return false; } } return true; }
/* DFS to fill in table of node links and link back with clientData */ static void fillNodeCompLinksDFS(mafTree *mTree, stTree *node, int *treeOrder, struct malnComp *treeComps[]) { for (int i = 0; i < stTree_getChildNumber(node); i++) { fillNodeCompLinksDFS(mTree, stTree_getChild(node, i), treeOrder, treeComps); } struct mafTreeNodeCompLink *ncLink = mafTreeNodeCompLink_construct(*treeOrder, node, treeComps[*treeOrder]); (*treeOrder)++; if (!sameString(ncLink->comp->seq->orgSeqName, stTree_getLabel(node))) { errAbort("tree component name \"%s\" doesn't match tree node name \"%s\"", ncLink->comp->seq->orgSeqName, stTree_getLabel(node)); } }
/* recursively find a species tree node *below* the specified node */ static stTree *speciesTreeFindBelow(stTree *speciesRoot, struct Genome *genome) { stTree *genomeNode = NULL; for (int i = 0; (genomeNode == NULL) && (i < stTree_getChildNumber(speciesRoot)); i++) { stTree *sn = stTree_getChild(speciesRoot, i); if (speciesTreeGetGenome(sn) == genome) { genomeNode = sn; } else { genomeNode = speciesTreeFindBelow(sn, genome); } } return genomeNode; }
/* recursively search a tree for node linked to the specified component */ static stTree *searchByComp(stTree *node, struct malnComp *comp) { struct mafTreeNodeCompLink *ncLink = getNodeCompLink(node); if (ncLink->comp == comp) { return node; } for (int i = 0; i < stTree_getChildNumber(node); i++) { stTree *hit = searchByComp(stTree_getChild(node, i), comp); if (hit != NULL) { return hit; } } return NULL; }
/* recursive dump */ static void dumpSubtree(stTree *root, FILE *fh, int indent) { fprintf(fh, "%*s", 4*indent, ""); struct malnComp *comp = getNodeComp(root); if (comp == NULL) { fprintf(fh, "%s", stTree_getLabel(root)); } else { malnComp_prInfo(comp, fh); } fputc('\n', fh); for (int i = 0; i < stTree_getChildNumber(root); i++) { dumpSubtree(stTree_getChild(root, i), fh, indent+1); } }
/* DFS to set or check tree order after a join. */ static void setCheckTreeOrderDFS(mafTree *mTree, stTree *node, bool check, int *treeOrder) { for (int i = 0; i < stTree_getChildNumber(node); i++) { setCheckTreeOrderDFS(mTree, stTree_getChild(node, i), check, treeOrder); } struct mafTreeNodeCompLink *ncLink = getNodeCompLink(node); if (!sameString(ncLink->comp->seq->orgSeqName, stTree_getLabel(node))) { errAbort("tree component name \"%s\" doesn't match tree node name \"%s\"", ncLink->comp->seq->orgSeqName, stTree_getLabel(node)); } if (!check) { ncLink->treeOrder = *treeOrder; } else if (ncLink->treeOrder != *treeOrder) { errAbort("expected tree order (%d) doesn't match actual tree node order (%d) for \"%s\"", *treeOrder, ncLink->treeOrder, stTree_getLabel(node)); } (*treeOrder)++; }
/* Remove a node from the tree and free. Can't delete the root node. */ void mafTree_deleteNode(mafTree *mTree, struct mafTreeNodeCompLink *ncLink) { stTree *node = ncLink->node; stTree *parent = stTree_getParent(node); if (parent == NULL) { errAbort("BUG: can't remove tree root node"); } stTree_setParent(node, NULL); // setParent changes node children while (stTree_getChildNumber(node) > 0) { stTree_setParent(stTree_getChild(node, 0), parent); } freeMafTreeNodeCompLinks(node); stTree_destruct(node); setCheckTreeOrder(mTree, false); }
/* recursively verify the tree */ static void speciesTreeBlkTreeVerify(stTree *speciesNode, stTree *blkNode) { speciesNode = speciesTreeFindAtBelow(speciesNode, getNodeComp(blkNode)->seq->genome); if (speciesNode == NULL) { speciesTreeMismatchError(blkNode); } else { for (int i = 0; i < stTree_getChildNumber(blkNode); i++) { stTree *blkSubNode = stTree_getChild(blkNode, i); stTree *speciesSubNode = speciesTreeFindAtBelow(speciesNode, getNodeComp(blkSubNode)->seq->genome); if (speciesSubNode == NULL) { speciesTreeMismatchError(blkNode); } else { speciesTreeBlkTreeVerify(speciesSubNode, blkSubNode); } } } }
void stTree_setChild(stTree *tree, int64_t i, stTree *child) { assert(i >= 0); assert(i < stTree_getChildNumber(tree)); stList_set(tree->nodes, i, child); }
/* recursively free mafTreeNodeCompLink objects */ static void freeMafTreeNodeCompLinks(stTree *node) { mafTreeNodeCompLink_destruct(getNodeCompLink(node)); for (int i = 0; i < stTree_getChildNumber(node); i++) { freeMafTreeNodeCompLinks(stTree_getChild(node, i)); } }
/* add genome objects as client data */ static void speciesTreeAddLinks(stTree *speciesNode, struct Genomes *genomes) { stTree_setClientData(speciesNode, genomesGetGenome(genomes, stTree_getLabel(speciesNode))); for (int i = 0; i < stTree_getChildNumber(speciesNode); i++) { speciesTreeAddLinks(stTree_getChild(speciesNode, i), genomes); } }
/* clone child and append clones to a give parent node */ static void cloneChildren(stTree *srcParent, stTree *destParent, struct malnCompCompMap *srcDestCompMap) { for (int i = 0; i < stTree_getChildNumber(srcParent); i++) { stTree_setParent(cloneTree(stTree_getChild(srcParent, i), destParent, srcDestCompMap), destParent); } }