Esempio n. 1
0
/* destroy tree and free mafTreeNodeCompLink objects */
void mafTree_destruct(mafTree *mTree) {
    if (mTree != NULL) {
        freeMafTreeNodeCompLinks(mTree->tree);
        stTree_destruct(mTree->tree);
        freeMem(mTree);
    }
}
Esempio n. 2
0
/* check sequence tree against species tree */
void mafTree_verifyWithSpeciesTree(mafTree *mTree, const char *nhSpeciesTree) {
    // recursively search the two trees.  This allows the block tree
    // to be shallower due to deletions.  Done in a way to detect
    // that a block node contains the same genome as its parent
    stTree *speciesTree = stTree_parseNewickString(nhSpeciesTree);
    speciesTreeAddLinks(speciesTree, mTree->genomes);
    speciesTreeBlkTreeVerify(speciesTree, mTree->tree);
    stTree_destruct(speciesTree);
}
Esempio n. 3
0
/* Remove a node from the tree and free.  Can't delete the root node. */
void mafTree_deleteNode(mafTree *mTree, struct mafTreeNodeCompLink *ncLink) {
    stTree *node = ncLink->node;
    stTree *parent = stTree_getParent(node);
    if (parent == NULL) {
        errAbort("BUG: can't remove tree root node");
    }
    stTree_setParent(node, NULL);
    // setParent changes node children
    while (stTree_getChildNumber(node) > 0) {
        stTree_setParent(stTree_getChild(node, 0), parent);
    }
    freeMafTreeNodeCompLinks(node);
    stTree_destruct(node);
    setCheckTreeOrder(mTree, false);
}
Esempio n. 4
0
int main(int argc, char *argv[]) {
    /*
     * Open the database.
     * Construct a flower.
     * Construct an event tree representing the species tree.
     * For each sequence contruct two ends each containing an cap.
     * Make a file for the sequence.
     * Link the two caps.
     * Finish!
     */

    int64_t key, j;
    Group *group;
    Flower_EndIterator *endIterator;
    End *end;
    bool makeEventHeadersAlphaNumeric = 0;

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * speciesTree = NULL;
    char * outgroupEvents = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, {
                "speciesTree", required_argument, 0, 'f' }, { "outgroupEvents", required_argument, 0, 'g' },
                { "help", no_argument, 0, 'h' }, { "makeEventHeadersAlphaNumeric", no_argument, 0, 'i' }, { 0, 0, 0, 0 } };

        int option_index = 0;

        key = getopt_long(argc, argv, "a:b:f:hg:i", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = optarg;
                break;
            case 'b':
                cactusDiskDatabaseString = optarg;
                break;
            case 'f':
                speciesTree = optarg;
                break;
            case 'g':
                outgroupEvents = optarg;
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                makeEventHeadersAlphaNumeric = 1;
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    //assert(logLevelString == NULL || strcmp(logLevelString, "CRITICAL") == 0 || strcmp(logLevelString, "INFO") == 0 || strcmp(logLevelString, "DEBUG") == 0);
    assert(cactusDiskDatabaseString != NULL);
    assert(speciesTree != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString);

    for (j = optind; j < argc; j++) {
        st_logInfo("Sequence file/directory %s\n", argv[j]);
    }

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    stKVDatabaseConf *kvDatabaseConf = kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    if (stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeTokyoCabinet || stKVDatabaseConf_getType(kvDatabaseConf)
            == stKVDatabaseTypeKyotoTycoon) {
        assert(stKVDatabaseConf_getDir(kvDatabaseConf) != NULL);
        cactusDisk = cactusDisk_construct2(kvDatabaseConf, "cactusSequences");
    } else {
        cactusDisk = cactusDisk_construct(kvDatabaseConf, 1);
    }
    st_logInfo("Set up the flower disk\n");

    //////////////////////////////////////////////
    //Construct the flower
    //////////////////////////////////////////////

    if (cactusDisk_getFlower(cactusDisk, 0) != NULL) {
        cactusDisk_destruct(cactusDisk);
        st_logInfo("The first flower already exists\n");
        return 0;
    }
    flower = flower_construct2(0, cactusDisk);
    assert(flower_getName(flower) == 0);
    st_logInfo("Constructed the flower\n");

    //////////////////////////////////////////////
    //Construct the event tree
    //////////////////////////////////////////////

    st_logInfo("Going to build the event tree with newick string: %s\n", speciesTree);
    stTree *tree = stTree_parseNewickString(speciesTree);
    st_logInfo("Parsed the tree\n");
    if (makeEventHeadersAlphaNumeric) {
        makeEventHeadersAlphaNumericFn(tree);
    }
    stTree_setBranchLength(tree, INT64_MAX);
    checkBranchLengthsAreDefined(tree);
    eventTree = eventTree_construct2(flower); //creates the event tree and the root even
    totalEventNumber = 1;
    st_logInfo("Constructed the basic event tree\n");

    // Construct a set of outgroup names so that ancestral outgroups
    // get recognized.
    stSet *outgroupNameSet = stSet_construct3(stHash_stringKey,
                                              stHash_stringEqualKey,
                                              free);
    if(outgroupEvents != NULL) {
        stList *outgroupNames = stString_split(outgroupEvents);
        for(int64_t i = 0; i < stList_length(outgroupNames); i++) {
            char *outgroupName = stList_get(outgroupNames, i);
            stSet_insert(outgroupNameSet, stString_copy(outgroupName));
        }
        stList_destruct(outgroupNames);
    }

    //now traverse the tree
    j = optind;
    assignEventsAndSequences(eventTree_getRootEvent(eventTree), tree,
                             outgroupNameSet, argv, &j);

    char *eventTreeString = eventTree_makeNewickString(eventTree);
    st_logInfo(
            "Constructed the initial flower with %" PRIi64 " sequences and %" PRIi64 " events with string: %s\n",
            totalSequenceNumber, totalEventNumber, eventTreeString);
    assert(event_getSubTreeBranchLength(eventTree_getRootEvent(eventTree)) >= 0.0);
    free(eventTreeString);
    //assert(0);

    //////////////////////////////////////////////
    //Label any outgroup events.
    //////////////////////////////////////////////

    if (outgroupEvents != NULL) {
        stList *outgroupEventsList = stString_split(outgroupEvents);
        for (int64_t i = 0; i < stList_length(outgroupEventsList); i++) {
            char *outgroupEvent = makeEventHeadersAlphaNumeric ? makeAlphaNumeric(stList_get(outgroupEventsList, i)) : stString_copy(stList_get(outgroupEventsList, i));
            Event *event = eventTree_getEventByHeader(eventTree, outgroupEvent);
            if (event == NULL) {
                st_errAbort("Got an outgroup string that does not match an event, outgroup string %s", outgroupEvent);
            }
            assert(!event_isOutgroup(event));
            event_setOutgroupStatus(event, 1);
            assert(event_isOutgroup(event));
            free(outgroupEvent);
        }
        stList_destruct(outgroupEventsList);
    }

    //////////////////////////////////////////////
    //Construct the terminal group.
    //////////////////////////////////////////////

    if (flower_getEndNumber(flower) > 0) {
        group = group_construct2(flower);
        endIterator = flower_getEndIterator(flower);
        while ((end = flower_getNextEnd(endIterator)) != NULL) {
            end_setGroup(end, group);
        }
        flower_destructEndIterator(endIterator);
        assert(group_isLeaf(group));

        // Create a one link chain if there is only one pair of attached ends..
        group_constructChainForLink(group);
        assert(!flower_builtBlocks(flower));
    } else {
        flower_setBuiltBlocks(flower, 1);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Write the flower to disk.
    ///////////////////////////////////////////////////////////////////////////

    //flower_check(flower);
    cactusDisk_write(cactusDisk);
    st_logInfo("Updated the flower on disk\n");

    ///////////////////////////////////////////////////////////////////////////
    // Cleanup.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);

    return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.

    stSet_destruct(outgroupNameSet);
    stTree_destruct(tree);
    stKVDatabaseConf_destruct(kvDatabaseConf);

    return 0;
}
Esempio n. 5
0
int main(int argc, char *argv[]) {
    /*
     * Arguments/options
     */
    char *logLevelString = NULL;
    char *mfaFile = NULL;
    char *outputFile = NULL;
    char *treeFile = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = {
            { "logLevel", required_argument, 0, 'a' },
            { "mfaFile", required_argument, 0, 'b' },
            { "outputFile", required_argument, 0, 'd' },
            { "treeFile", optional_argument, 0, 't' },
            { "help", no_argument, 0, 'h' },
            { 0, 0, 0, 0 }
        };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:d:t:h", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                mfaFile = stString_copy(optarg);
                break;
            case 'd':
                outputFile = stString_copy(optarg);
                break;
            case 't':
                treeFile = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    if (argc == 1) {
        usage();
        exit(1);
    }

    assert(mfaFile != NULL);
    assert(outputFile != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("MFA file  name : %s\n", mfaFile);
    st_logInfo("Output MAF file : %s\n", outputFile);
    st_logInfo("Tree file name: %s\n", treeFile == NULL ? "null" : treeFile);

    //////////////////////////////////////////////
    //Get the MFA alignment
    //////////////////////////////////////////////

    //get the alignment
    struct List *sequences = constructEmptyList(0, free);
    struct List *seqLengths = constructEmptyList(0, (void (*)(void *))destructInt);
    struct List *fastaNames = constructEmptyList(0, free);
    FILE *fileHandle = fopen(mfaFile, "r");
    if (fileHandle == NULL) {
        usage();
        exit(1);
    }
    fastaRead(fileHandle, sequences, seqLengths, fastaNames);
    fclose(fileHandle);

    //////////////////////////////////////////////
    //Get the tree alignment
    //////////////////////////////////////////////

    stTree *tree = NULL;
    LeafPtrArray *leafArray = NULL;

    int32_t leafCount = 0;
    if (treeFile != NULL) {
        tree = eTreeX_getTreeFromFile(treeFile);

        eTreeX_postOrderTraversal(tree, eTreeX_countLeaves, &leafCount);

        leafArray = eTreeX_constructLeafPtrArray(leafCount);
        eTreeX_postOrderTraversal(tree, eTreeX_getLeafArray, (void *) leafArray);
    }

    //////////////////////////////////////////////
    //Write the MFA alignment.
    //////////////////////////////////////////////

    fileHandle = fopen(outputFile, "w");
    //write the header.
    fprintf(fileHandle, "##maf version=1 scoring=NULL\n");
    fprintf(fileHandle, "# converted_from_MFA\n\n");

    //write the score line
    char *treeString = NULL;
    if (treeFile != NULL) {
        treeString = stTree_getNewickTreeString(tree);
        fprintf(fileHandle, "a score=0 tree=\"%s\"\n", treeString);
    }
    else {
        fprintf(fileHandle, "a score=0\n");
        leafCount = sequences->length;
    }

    //write the alignment
    int32_t i, j;
    int32_t ii;
    const char *label;
    for (ii=0; ii<leafCount; ii++) {
        if (treeFile != NULL) {
            label = stTree_getLabel((stTree *) leafArray->ptrArray[ii]);

            /* Do a brute force search to find the appropriate sequence that matches "label" */
            for (i=0; i<sequences->length; i++) {
                char *fastaHeader = fastaNames->list[i];
                char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
                sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
                if (strcmp(label, sequenceName) == 0) {
                    free(sequenceName);
                    break;
                }
                free(sequenceName);
            }
        }
        else {
            i = ii;
        }

        char *sequence = sequences->list[i];
        int32_t seqLength = *((int32_t *)seqLengths->list[i]);
        assert(seqLength == (int32_t)strlen(sequence));
        char *fastaHeader = fastaNames->list[i];
        char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
        sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
        int32_t length = 0;
        for (j=0; j<(int32_t)strlen(sequence); j++) {
            if (sequence[j] != '-') {
                length++;
            }
        }
        fprintf(fileHandle, "s\t%s\t%i\t%i\t%s\t%i\t%s\n", sequenceName, 0, length, "+", length, sequence);
        free(sequenceName);
    }

    fclose(fileHandle);

    //////////////////////////////////////////////
    //Clean up.
    //////////////////////////////////////////////

    free(mfaFile);
    free(outputFile);
    free(treeFile);

    if (treeFile != NULL) {
        stTree_destruct(tree);
        free(treeString);
        eTreeX_destructLeafPtrArray(leafArray);
    }

    destructList(sequences);
    destructList(seqLengths);
    destructList(fastaNames);

    return 0;
}