Exemple #1
0
void checkBranchLengthsAreDefined(stTree *tree) {
    if (isinf(stTree_getBranchLength(tree))) {
        st_errAbort("Got a non defined branch length in the input tree: %s.\n", stTree_getNewickTreeString(tree));
    }
    for (int64_t i = 0; i < stTree_getChildNumber(tree); i++) {
        checkBranchLengthsAreDefined(stTree_getChild(tree, i));
    }
}
Exemple #2
0
/* format tree as a newick string */
char *mafTree_format(mafTree *mTree) {
    return stTree_getNewickTreeString(mTree->tree);
}
Exemple #3
0
int main(int argc, char *argv[]) {
    /*
     * Arguments/options
     */
    char *logLevelString = NULL;
    char *mfaFile = NULL;
    char *outputFile = NULL;
    char *treeFile = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = {
            { "logLevel", required_argument, 0, 'a' },
            { "mfaFile", required_argument, 0, 'b' },
            { "outputFile", required_argument, 0, 'd' },
            { "treeFile", optional_argument, 0, 't' },
            { "help", no_argument, 0, 'h' },
            { 0, 0, 0, 0 }
        };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:d:t:h", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                mfaFile = stString_copy(optarg);
                break;
            case 'd':
                outputFile = stString_copy(optarg);
                break;
            case 't':
                treeFile = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    if (argc == 1) {
        usage();
        exit(1);
    }

    assert(mfaFile != NULL);
    assert(outputFile != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("MFA file  name : %s\n", mfaFile);
    st_logInfo("Output MAF file : %s\n", outputFile);
    st_logInfo("Tree file name: %s\n", treeFile == NULL ? "null" : treeFile);

    //////////////////////////////////////////////
    //Get the MFA alignment
    //////////////////////////////////////////////

    //get the alignment
    struct List *sequences = constructEmptyList(0, free);
    struct List *seqLengths = constructEmptyList(0, (void (*)(void *))destructInt);
    struct List *fastaNames = constructEmptyList(0, free);
    FILE *fileHandle = fopen(mfaFile, "r");
    if (fileHandle == NULL) {
        usage();
        exit(1);
    }
    fastaRead(fileHandle, sequences, seqLengths, fastaNames);
    fclose(fileHandle);

    //////////////////////////////////////////////
    //Get the tree alignment
    //////////////////////////////////////////////

    stTree *tree = NULL;
    LeafPtrArray *leafArray = NULL;

    int32_t leafCount = 0;
    if (treeFile != NULL) {
        tree = eTreeX_getTreeFromFile(treeFile);

        eTreeX_postOrderTraversal(tree, eTreeX_countLeaves, &leafCount);

        leafArray = eTreeX_constructLeafPtrArray(leafCount);
        eTreeX_postOrderTraversal(tree, eTreeX_getLeafArray, (void *) leafArray);
    }

    //////////////////////////////////////////////
    //Write the MFA alignment.
    //////////////////////////////////////////////

    fileHandle = fopen(outputFile, "w");
    //write the header.
    fprintf(fileHandle, "##maf version=1 scoring=NULL\n");
    fprintf(fileHandle, "# converted_from_MFA\n\n");

    //write the score line
    char *treeString = NULL;
    if (treeFile != NULL) {
        treeString = stTree_getNewickTreeString(tree);
        fprintf(fileHandle, "a score=0 tree=\"%s\"\n", treeString);
    }
    else {
        fprintf(fileHandle, "a score=0\n");
        leafCount = sequences->length;
    }

    //write the alignment
    int32_t i, j;
    int32_t ii;
    const char *label;
    for (ii=0; ii<leafCount; ii++) {
        if (treeFile != NULL) {
            label = stTree_getLabel((stTree *) leafArray->ptrArray[ii]);

            /* Do a brute force search to find the appropriate sequence that matches "label" */
            for (i=0; i<sequences->length; i++) {
                char *fastaHeader = fastaNames->list[i];
                char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
                sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
                if (strcmp(label, sequenceName) == 0) {
                    free(sequenceName);
                    break;
                }
                free(sequenceName);
            }
        }
        else {
            i = ii;
        }

        char *sequence = sequences->list[i];
        int32_t seqLength = *((int32_t *)seqLengths->list[i]);
        assert(seqLength == (int32_t)strlen(sequence));
        char *fastaHeader = fastaNames->list[i];
        char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
        sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
        int32_t length = 0;
        for (j=0; j<(int32_t)strlen(sequence); j++) {
            if (sequence[j] != '-') {
                length++;
            }
        }
        fprintf(fileHandle, "s\t%s\t%i\t%i\t%s\t%i\t%s\n", sequenceName, 0, length, "+", length, sequence);
        free(sequenceName);
    }

    fclose(fileHandle);

    //////////////////////////////////////////////
    //Clean up.
    //////////////////////////////////////////////

    free(mfaFile);
    free(outputFile);
    free(treeFile);

    if (treeFile != NULL) {
        stTree_destruct(tree);
        free(treeString);
        eTreeX_destructLeafPtrArray(leafArray);
    }

    destructList(sequences);
    destructList(seqLengths);
    destructList(fastaNames);

    return 0;
}