/* destroy tree and free mafTreeNodeCompLink objects */ void mafTree_destruct(mafTree *mTree) { if (mTree != NULL) { freeMafTreeNodeCompLinks(mTree->tree); stTree_destruct(mTree->tree); freeMem(mTree); } }
/* check sequence tree against species tree */ void mafTree_verifyWithSpeciesTree(mafTree *mTree, const char *nhSpeciesTree) { // recursively search the two trees. This allows the block tree // to be shallower due to deletions. Done in a way to detect // that a block node contains the same genome as its parent stTree *speciesTree = stTree_parseNewickString(nhSpeciesTree); speciesTreeAddLinks(speciesTree, mTree->genomes); speciesTreeBlkTreeVerify(speciesTree, mTree->tree); stTree_destruct(speciesTree); }
/* Remove a node from the tree and free. Can't delete the root node. */ void mafTree_deleteNode(mafTree *mTree, struct mafTreeNodeCompLink *ncLink) { stTree *node = ncLink->node; stTree *parent = stTree_getParent(node); if (parent == NULL) { errAbort("BUG: can't remove tree root node"); } stTree_setParent(node, NULL); // setParent changes node children while (stTree_getChildNumber(node) > 0) { stTree_setParent(stTree_getChild(node, 0), parent); } freeMafTreeNodeCompLinks(node); stTree_destruct(node); setCheckTreeOrder(mTree, false); }
int main(int argc, char *argv[]) { /* * Open the database. * Construct a flower. * Construct an event tree representing the species tree. * For each sequence contruct two ends each containing an cap. * Make a file for the sequence. * Link the two caps. * Finish! */ int64_t key, j; Group *group; Flower_EndIterator *endIterator; End *end; bool makeEventHeadersAlphaNumeric = 0; /* * Arguments/options */ char * logLevelString = NULL; char * speciesTree = NULL; char * outgroupEvents = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "speciesTree", required_argument, 0, 'f' }, { "outgroupEvents", required_argument, 0, 'g' }, { "help", no_argument, 0, 'h' }, { "makeEventHeadersAlphaNumeric", no_argument, 0, 'i' }, { 0, 0, 0, 0 } }; int option_index = 0; key = getopt_long(argc, argv, "a:b:f:hg:i", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = optarg; break; case 'b': cactusDiskDatabaseString = optarg; break; case 'f': speciesTree = optarg; break; case 'g': outgroupEvents = optarg; break; case 'h': usage(); return 0; case 'i': makeEventHeadersAlphaNumeric = 1; break; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// //assert(logLevelString == NULL || strcmp(logLevelString, "CRITICAL") == 0 || strcmp(logLevelString, "INFO") == 0 || strcmp(logLevelString, "DEBUG") == 0); assert(cactusDiskDatabaseString != NULL); assert(speciesTree != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString); for (j = optind; j < argc; j++) { st_logInfo("Sequence file/directory %s\n", argv[j]); } ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); if (stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeTokyoCabinet || stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeKyotoTycoon) { assert(stKVDatabaseConf_getDir(kvDatabaseConf) != NULL); cactusDisk = cactusDisk_construct2(kvDatabaseConf, "cactusSequences"); } else { cactusDisk = cactusDisk_construct(kvDatabaseConf, 1); } st_logInfo("Set up the flower disk\n"); ////////////////////////////////////////////// //Construct the flower ////////////////////////////////////////////// if (cactusDisk_getFlower(cactusDisk, 0) != NULL) { cactusDisk_destruct(cactusDisk); st_logInfo("The first flower already exists\n"); return 0; } flower = flower_construct2(0, cactusDisk); assert(flower_getName(flower) == 0); st_logInfo("Constructed the flower\n"); ////////////////////////////////////////////// //Construct the event tree ////////////////////////////////////////////// st_logInfo("Going to build the event tree with newick string: %s\n", speciesTree); stTree *tree = stTree_parseNewickString(speciesTree); st_logInfo("Parsed the tree\n"); if (makeEventHeadersAlphaNumeric) { makeEventHeadersAlphaNumericFn(tree); } stTree_setBranchLength(tree, INT64_MAX); checkBranchLengthsAreDefined(tree); eventTree = eventTree_construct2(flower); //creates the event tree and the root even totalEventNumber = 1; st_logInfo("Constructed the basic event tree\n"); // Construct a set of outgroup names so that ancestral outgroups // get recognized. stSet *outgroupNameSet = stSet_construct3(stHash_stringKey, stHash_stringEqualKey, free); if(outgroupEvents != NULL) { stList *outgroupNames = stString_split(outgroupEvents); for(int64_t i = 0; i < stList_length(outgroupNames); i++) { char *outgroupName = stList_get(outgroupNames, i); stSet_insert(outgroupNameSet, stString_copy(outgroupName)); } stList_destruct(outgroupNames); } //now traverse the tree j = optind; assignEventsAndSequences(eventTree_getRootEvent(eventTree), tree, outgroupNameSet, argv, &j); char *eventTreeString = eventTree_makeNewickString(eventTree); st_logInfo( "Constructed the initial flower with %" PRIi64 " sequences and %" PRIi64 " events with string: %s\n", totalSequenceNumber, totalEventNumber, eventTreeString); assert(event_getSubTreeBranchLength(eventTree_getRootEvent(eventTree)) >= 0.0); free(eventTreeString); //assert(0); ////////////////////////////////////////////// //Label any outgroup events. ////////////////////////////////////////////// if (outgroupEvents != NULL) { stList *outgroupEventsList = stString_split(outgroupEvents); for (int64_t i = 0; i < stList_length(outgroupEventsList); i++) { char *outgroupEvent = makeEventHeadersAlphaNumeric ? makeAlphaNumeric(stList_get(outgroupEventsList, i)) : stString_copy(stList_get(outgroupEventsList, i)); Event *event = eventTree_getEventByHeader(eventTree, outgroupEvent); if (event == NULL) { st_errAbort("Got an outgroup string that does not match an event, outgroup string %s", outgroupEvent); } assert(!event_isOutgroup(event)); event_setOutgroupStatus(event, 1); assert(event_isOutgroup(event)); free(outgroupEvent); } stList_destruct(outgroupEventsList); } ////////////////////////////////////////////// //Construct the terminal group. ////////////////////////////////////////////// if (flower_getEndNumber(flower) > 0) { group = group_construct2(flower); endIterator = flower_getEndIterator(flower); while ((end = flower_getNextEnd(endIterator)) != NULL) { end_setGroup(end, group); } flower_destructEndIterator(endIterator); assert(group_isLeaf(group)); // Create a one link chain if there is only one pair of attached ends.. group_constructChainForLink(group); assert(!flower_builtBlocks(flower)); } else { flower_setBuiltBlocks(flower, 1); } /////////////////////////////////////////////////////////////////////////// // Write the flower to disk. /////////////////////////////////////////////////////////////////////////// //flower_check(flower); cactusDisk_write(cactusDisk); st_logInfo("Updated the flower on disk\n"); /////////////////////////////////////////////////////////////////////////// // Cleanup. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection. stSet_destruct(outgroupNameSet); stTree_destruct(tree); stKVDatabaseConf_destruct(kvDatabaseConf); return 0; }
int main(int argc, char *argv[]) { /* * Arguments/options */ char *logLevelString = NULL; char *mfaFile = NULL; char *outputFile = NULL; char *treeFile = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while (1) { static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "mfaFile", required_argument, 0, 'b' }, { "outputFile", required_argument, 0, 'd' }, { "treeFile", optional_argument, 0, 't' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "a:b:d:t:h", long_options, &option_index); if (key == -1) { break; } switch (key) { case 'a': logLevelString = stString_copy(optarg); break; case 'b': mfaFile = stString_copy(optarg); break; case 'd': outputFile = stString_copy(optarg); break; case 't': treeFile = stString_copy(optarg); break; case 'h': usage(); return 0; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// if (argc == 1) { usage(); exit(1); } assert(mfaFile != NULL); assert(outputFile != NULL); ////////////////////////////////////////////// //Set up logging ////////////////////////////////////////////// st_setLogLevelFromString(logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("MFA file name : %s\n", mfaFile); st_logInfo("Output MAF file : %s\n", outputFile); st_logInfo("Tree file name: %s\n", treeFile == NULL ? "null" : treeFile); ////////////////////////////////////////////// //Get the MFA alignment ////////////////////////////////////////////// //get the alignment struct List *sequences = constructEmptyList(0, free); struct List *seqLengths = constructEmptyList(0, (void (*)(void *))destructInt); struct List *fastaNames = constructEmptyList(0, free); FILE *fileHandle = fopen(mfaFile, "r"); if (fileHandle == NULL) { usage(); exit(1); } fastaRead(fileHandle, sequences, seqLengths, fastaNames); fclose(fileHandle); ////////////////////////////////////////////// //Get the tree alignment ////////////////////////////////////////////// stTree *tree = NULL; LeafPtrArray *leafArray = NULL; int32_t leafCount = 0; if (treeFile != NULL) { tree = eTreeX_getTreeFromFile(treeFile); eTreeX_postOrderTraversal(tree, eTreeX_countLeaves, &leafCount); leafArray = eTreeX_constructLeafPtrArray(leafCount); eTreeX_postOrderTraversal(tree, eTreeX_getLeafArray, (void *) leafArray); } ////////////////////////////////////////////// //Write the MFA alignment. ////////////////////////////////////////////// fileHandle = fopen(outputFile, "w"); //write the header. fprintf(fileHandle, "##maf version=1 scoring=NULL\n"); fprintf(fileHandle, "# converted_from_MFA\n\n"); //write the score line char *treeString = NULL; if (treeFile != NULL) { treeString = stTree_getNewickTreeString(tree); fprintf(fileHandle, "a score=0 tree=\"%s\"\n", treeString); } else { fprintf(fileHandle, "a score=0\n"); leafCount = sequences->length; } //write the alignment int32_t i, j; int32_t ii; const char *label; for (ii=0; ii<leafCount; ii++) { if (treeFile != NULL) { label = stTree_getLabel((stTree *) leafArray->ptrArray[ii]); /* Do a brute force search to find the appropriate sequence that matches "label" */ for (i=0; i<sequences->length; i++) { char *fastaHeader = fastaNames->list[i]; char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader))); sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence. if (strcmp(label, sequenceName) == 0) { free(sequenceName); break; } free(sequenceName); } } else { i = ii; } char *sequence = sequences->list[i]; int32_t seqLength = *((int32_t *)seqLengths->list[i]); assert(seqLength == (int32_t)strlen(sequence)); char *fastaHeader = fastaNames->list[i]; char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader))); sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence. int32_t length = 0; for (j=0; j<(int32_t)strlen(sequence); j++) { if (sequence[j] != '-') { length++; } } fprintf(fileHandle, "s\t%s\t%i\t%i\t%s\t%i\t%s\n", sequenceName, 0, length, "+", length, sequence); free(sequenceName); } fclose(fileHandle); ////////////////////////////////////////////// //Clean up. ////////////////////////////////////////////// free(mfaFile); free(outputFile); free(treeFile); if (treeFile != NULL) { stTree_destruct(tree); free(treeString); eTreeX_destructLeafPtrArray(leafArray); } destructList(sequences); destructList(seqLengths); destructList(fastaNames); return 0; }