Exemple #1
0
static char *tree_getNewickTreeStringP(stTree *tree) {
    char *cA, *cA2;
    if(stTree_getChildNumber(tree) > 0) {
        int32_t i;
        cA = stString_copy("(");
        for(i=0; i<stTree_getChildNumber(tree); i++) {
            cA2 = tree_getNewickTreeStringP(stTree_getChild(tree, i));
            char *cA3 = stString_print((i+1 < stTree_getChildNumber(tree) ? "%s%s," : "%s%s"), cA, cA2);
            free(cA);
            free(cA2);
            cA = cA3;
        }
        cA2 = stString_print("%s)", cA);
        free(cA);
        cA = cA2;
    }
    else {
        cA = stString_copy("");
    }
    if(stTree_getLabel(tree) != NULL) {
        cA2 = stString_print("%s%s", cA, stTree_getLabel(tree));
        free(cA);
        cA = cA2;
    }
    if(stTree_getBranchLength(tree) != INFINITY) {
        char *cA2 = stString_print("%s:%g", cA, stTree_getBranchLength(tree));
        free(cA);
        cA = cA2;
    }
    return cA;
}
Exemple #2
0
static void assignEventsAndSequences(Event *parentEvent, stTree *tree,
                                     stSet *outgroupNameSet,
                                     char *argv[], int64_t *j) {
    Event *myEvent = NULL; // To distinguish from the global "event" variable.
    assert(tree != NULL);
    totalEventNumber++;
    if (stTree_getChildNumber(tree) > 0) {
        myEvent = event_construct3(stTree_getLabel(tree),
                                   stTree_getBranchLength(tree), parentEvent,
                                   eventTree);
        for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) {
            assignEventsAndSequences(myEvent, stTree_getChild(tree, i),
                                     outgroupNameSet, argv, j);
        }
    }
    if (stTree_getChildNumber(tree) == 0 || (stTree_getLabel(tree) != NULL && (stSet_search(outgroupNameSet, (char *)stTree_getLabel(tree)) != NULL))) {
        // This event is a leaf and/or an outgroup, so it has
        // associated sequence.
        assert(stTree_getLabel(tree) != NULL);

        assert(stTree_getBranchLength(tree) != INFINITY);
        if (stTree_getChildNumber(tree) == 0) {
            // Construct the leaf event
            myEvent = event_construct3(stTree_getLabel(tree), stTree_getBranchLength(tree), parentEvent, eventTree);
        }

        char *fileName = argv[*j];

        if (!stFile_exists(fileName)) {
            st_errAbort("File does not exist: %s\n", fileName);
        }

        // Set the global "event" variable, which is needed for the
        // function provided to fastaReadToFunction.
        event = myEvent;
        if (stFile_isDir(fileName)) {
            st_logInfo("Processing directory: %s\n", fileName);
            stList *filesInDir = stFile_getFileNamesInDirectory(fileName);
            for (int64_t i = 0; i < stList_length(filesInDir); i++) {
                char *absChildFileName = stFile_pathJoin(fileName, stList_get(filesInDir, i));
                assert(stFile_exists(absChildFileName));
                setCompleteStatus(absChildFileName); //decide if the sequences in the file should be free or attached.
                FILE *fileHandle = fopen(absChildFileName, "r");
                fastaReadToFunction(fileHandle, processSequence);
                fclose(fileHandle);
                free(absChildFileName);
            }
            stList_destruct(filesInDir);
        } else {
            st_logInfo("Processing file: %s\n", fileName);
            setCompleteStatus(fileName); //decide if the sequences in the file should be free or attached.
            FILE *fileHandle = fopen(fileName, "r");
            fastaReadToFunction(fileHandle, processSequence);
            fclose(fileHandle);
        }
        (*j)++;
    }
}
Exemple #3
0
/* enforce order of children */
static int sortChildrenCmpFn(stTree *a, stTree *b) {
    int diff = strcmp(stTree_getLabel(a), stTree_getLabel(b));
    if (diff == 0) {
        // same names, sort by seq and location, if available
        struct mafTreeNodeCompLink *ncLinkA = getNodeCompLink(a), *ncLinkB = getNodeCompLink(b);
        if ((ncLinkA != NULL) && (ncLinkB != NULL)) {
            diff = malnComp_cmp(ncLinkA->comp, ncLinkB->comp);
        }
    }
    return diff;
}
Exemple #4
0
/* assert sanity of nodeCompLink */
void mafTreeNodeCompLink_assert(struct mafTreeNodeCompLink *ncLink) {
#ifndef NDEBUG
    if (ncLink != NULL) {
        assert(stString_eq(stTree_getLabel(ncLink->node), ncLink->comp->seq->orgSeqName));
    }
#endif
}
Exemple #5
0
/* clone a node */
stTree *stTree_cloneNode(stTree *node) {
    stTree *node2 = stTree_construct();
    stTree_setBranchLength(node2, stTree_getBranchLength(node));
    stTree_setClientData(node2, stTree_getClientData(node));
    stTree_setLabel(node2, stTree_getLabel(node));
    return node2;
}
Exemple #6
0
void makeEventHeadersAlphaNumericFn(stTree *tree) {
    char *cA = makeAlphaNumeric(stTree_getLabel(tree));
    stTree_setLabel(tree, cA);
    free(cA);
    for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) {
        makeEventHeadersAlphaNumericFn(stTree_getChild(tree, i));
    }
}
Exemple #7
0
bool stTree_equals(stTree *tree1, stTree *tree2) {
    if (stTree_getBranchLength(tree1) != stTree_getBranchLength(tree2)) {
        return false;
    }
    if (!stString_eq(stTree_getLabel(tree1), stTree_getLabel(tree2))) {
        return false;
    }
    int numChildren = stTree_getChildNumber(tree1);
    if (stTree_getChildNumber(tree2) != numChildren) {
        return false;
    }
    for (int i = 0; i < numChildren; i++) {
        if (!stTree_equals(stTree_getChild(tree1, i), stTree_getChild(tree2, i))) {
            return false;
        }
    }
    return true;
}
Exemple #8
0
/* DFS to fill in table of node links and link back with clientData */
static void fillNodeCompLinksDFS(mafTree *mTree, stTree *node, int *treeOrder, struct malnComp *treeComps[]) {
    for (int i = 0; i < stTree_getChildNumber(node); i++) {
        fillNodeCompLinksDFS(mTree, stTree_getChild(node, i), treeOrder, treeComps);
    }
    struct mafTreeNodeCompLink *ncLink = mafTreeNodeCompLink_construct(*treeOrder, node, treeComps[*treeOrder]);
    (*treeOrder)++;
    if (!sameString(ncLink->comp->seq->orgSeqName, stTree_getLabel(node))) {
        errAbort("tree component name \"%s\" doesn't match tree node name \"%s\"", ncLink->comp->seq->orgSeqName, stTree_getLabel(node));
    }
}
Exemple #9
0
/* recursive dump */
static void dumpSubtree(stTree *root, FILE *fh, int indent) {
    fprintf(fh, "%*s", 4*indent, "");
    struct malnComp *comp = getNodeComp(root);
    if (comp == NULL) {
        fprintf(fh, "%s", stTree_getLabel(root));
    } else {
        malnComp_prInfo(comp, fh);
    }
    fputc('\n', fh);
    for (int i = 0; i < stTree_getChildNumber(root); i++) {
        dumpSubtree(stTree_getChild(root, i), fh, indent+1);
    }
}
Exemple #10
0
/* DFS to set or check tree order after a join. */
static void setCheckTreeOrderDFS(mafTree *mTree, stTree *node, bool check, int *treeOrder) {
    for (int i = 0; i < stTree_getChildNumber(node); i++) {
        setCheckTreeOrderDFS(mTree, stTree_getChild(node, i), check, treeOrder);
    }
    struct mafTreeNodeCompLink *ncLink = getNodeCompLink(node);
    if (!sameString(ncLink->comp->seq->orgSeqName, stTree_getLabel(node))) {
        errAbort("tree component name \"%s\" doesn't match tree node name \"%s\"", ncLink->comp->seq->orgSeqName, stTree_getLabel(node));
    }
    if (!check) {
        ncLink->treeOrder = *treeOrder;
    } else if (ncLink->treeOrder != *treeOrder) {
        errAbort("expected tree order (%d) doesn't match actual tree node order (%d) for \"%s\"", *treeOrder, ncLink->treeOrder, stTree_getLabel(node));
    }
    (*treeOrder)++;
}
Exemple #11
0
/* Join at the specified components, returning new root */
static stTree *joinAtNodes(stTree *root1, stTree *node1, stTree *root2, stTree *node2, struct malnCompCompMap *srcDestCompMap) {
    if ((stTree_getParent(node1) == NULL) && (stTree_getParent(node2) == NULL)) {
        assert((root1 == node1) && (root2 == node2));
        return joinTrees(root1, node1, node2, srcDestCompMap);
    } else if (stTree_getParent(node1) == NULL) {
        assert(root1 == node1);
        return joinTrees(root2, node2, node1, srcDestCompMap);
    } else if (stTree_getParent(node2) == NULL) {
        assert(root2 == node2);
        return joinTrees(root1, node1, node2, srcDestCompMap);
    } else {
        errAbort("join nodes don't obey rules: node1: %s node2: %s", stTree_getLabel(node1), stTree_getLabel(node2));
        return NULL;
    }
}
Exemple #12
0
/* add genome objects as client data */
static void speciesTreeAddLinks(stTree *speciesNode, struct Genomes *genomes) {
    stTree_setClientData(speciesNode, genomesGetGenome(genomes, stTree_getLabel(speciesNode)));
    for (int i = 0; i < stTree_getChildNumber(speciesNode); i++) {
        speciesTreeAddLinks(stTree_getChild(speciesNode, i), genomes);
    }
}
Exemple #13
0
/* clone the root. */
static stTree *subrangeCloneRoot(stTree *srcRoot, struct malnCompCompMap *srcDestCompMap) {
    // clone root, if deleted, these must only be one child (due to the way
    // the trees are constructed).
    stList *pendingSubtrees = stList_construct();
    stTree *destRoot = subrangeCloneNode(srcRoot, srcDestCompMap, pendingSubtrees);
    if (destRoot == NULL) {
        if (stList_length(pendingSubtrees) > 1) {
            struct mafTreeNodeCompLink *srcNcLink = getNodeCompLink(srcRoot);
            errAbort("deleted tree root %s (component: %s:%d-%d/%c)) has more that one child", stTree_getLabel(srcRoot), srcNcLink->comp->seq->orgSeqName, srcNcLink->comp->start, srcNcLink->comp->end, srcNcLink->comp->strand);
        } else if (stList_length(pendingSubtrees) == 1) {
            destRoot = stList_pop(pendingSubtrees);
        }
    }
    stList_destruct(pendingSubtrees);
    return destRoot;
}
Exemple #14
0
int main(int argc, char *argv[]) {
    /*
     * Arguments/options
     */
    char *logLevelString = NULL;
    char *mfaFile = NULL;
    char *outputFile = NULL;
    char *treeFile = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = {
            { "logLevel", required_argument, 0, 'a' },
            { "mfaFile", required_argument, 0, 'b' },
            { "outputFile", required_argument, 0, 'd' },
            { "treeFile", optional_argument, 0, 't' },
            { "help", no_argument, 0, 'h' },
            { 0, 0, 0, 0 }
        };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:d:t:h", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                mfaFile = stString_copy(optarg);
                break;
            case 'd':
                outputFile = stString_copy(optarg);
                break;
            case 't':
                treeFile = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    if (argc == 1) {
        usage();
        exit(1);
    }

    assert(mfaFile != NULL);
    assert(outputFile != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("MFA file  name : %s\n", mfaFile);
    st_logInfo("Output MAF file : %s\n", outputFile);
    st_logInfo("Tree file name: %s\n", treeFile == NULL ? "null" : treeFile);

    //////////////////////////////////////////////
    //Get the MFA alignment
    //////////////////////////////////////////////

    //get the alignment
    struct List *sequences = constructEmptyList(0, free);
    struct List *seqLengths = constructEmptyList(0, (void (*)(void *))destructInt);
    struct List *fastaNames = constructEmptyList(0, free);
    FILE *fileHandle = fopen(mfaFile, "r");
    if (fileHandle == NULL) {
        usage();
        exit(1);
    }
    fastaRead(fileHandle, sequences, seqLengths, fastaNames);
    fclose(fileHandle);

    //////////////////////////////////////////////
    //Get the tree alignment
    //////////////////////////////////////////////

    stTree *tree = NULL;
    LeafPtrArray *leafArray = NULL;

    int32_t leafCount = 0;
    if (treeFile != NULL) {
        tree = eTreeX_getTreeFromFile(treeFile);

        eTreeX_postOrderTraversal(tree, eTreeX_countLeaves, &leafCount);

        leafArray = eTreeX_constructLeafPtrArray(leafCount);
        eTreeX_postOrderTraversal(tree, eTreeX_getLeafArray, (void *) leafArray);
    }

    //////////////////////////////////////////////
    //Write the MFA alignment.
    //////////////////////////////////////////////

    fileHandle = fopen(outputFile, "w");
    //write the header.
    fprintf(fileHandle, "##maf version=1 scoring=NULL\n");
    fprintf(fileHandle, "# converted_from_MFA\n\n");

    //write the score line
    char *treeString = NULL;
    if (treeFile != NULL) {
        treeString = stTree_getNewickTreeString(tree);
        fprintf(fileHandle, "a score=0 tree=\"%s\"\n", treeString);
    }
    else {
        fprintf(fileHandle, "a score=0\n");
        leafCount = sequences->length;
    }

    //write the alignment
    int32_t i, j;
    int32_t ii;
    const char *label;
    for (ii=0; ii<leafCount; ii++) {
        if (treeFile != NULL) {
            label = stTree_getLabel((stTree *) leafArray->ptrArray[ii]);

            /* Do a brute force search to find the appropriate sequence that matches "label" */
            for (i=0; i<sequences->length; i++) {
                char *fastaHeader = fastaNames->list[i];
                char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
                sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
                if (strcmp(label, sequenceName) == 0) {
                    free(sequenceName);
                    break;
                }
                free(sequenceName);
            }
        }
        else {
            i = ii;
        }

        char *sequence = sequences->list[i];
        int32_t seqLength = *((int32_t *)seqLengths->list[i]);
        assert(seqLength == (int32_t)strlen(sequence));
        char *fastaHeader = fastaNames->list[i];
        char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
        sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
        int32_t length = 0;
        for (j=0; j<(int32_t)strlen(sequence); j++) {
            if (sequence[j] != '-') {
                length++;
            }
        }
        fprintf(fileHandle, "s\t%s\t%i\t%i\t%s\t%i\t%s\n", sequenceName, 0, length, "+", length, sequence);
        free(sequenceName);
    }

    fclose(fileHandle);

    //////////////////////////////////////////////
    //Clean up.
    //////////////////////////////////////////////

    free(mfaFile);
    free(outputFile);
    free(treeFile);

    if (treeFile != NULL) {
        stTree_destruct(tree);
        free(treeString);
        eTreeX_destructLeafPtrArray(leafArray);
    }

    destructList(sequences);
    destructList(seqLengths);
    destructList(fastaNames);

    return 0;
}