Esempio n. 1
0
Group *flower_getParentGroup(Flower *flower) {
    if (flower->parentFlowerName == NULL_NAME) {
        return NULL;
    }
    Flower *flower2 = cactusDisk_getFlower(flower_getCactusDisk(flower), flower->parentFlowerName);
    assert(flower2 != NULL);
    return flower_getGroup(flower2, flower_getName(flower));
}
Esempio n. 2
0
void flower_unloadParent(Flower *flower) {
    Name parentName = flower->parentFlowerName;
    if (parentName != NULL_NAME) {
        CactusDisk *cactusDisk = flower_getCactusDisk(flower);
        if (cactusDisk_flowerIsLoaded(cactusDisk, parentName)) {
            Flower *parentFlower = cactusDisk_getFlower(cactusDisk, parentName);
            flower_unload(parentFlower);
        }
    }
}
Esempio n. 3
0
void testCactusDisk_getFlower(CuTest* testCase) {
    cactusDiskTestSetup();
    Flower *flower = flower_construct(cactusDisk);
    Flower *flower2 = flower_construct(cactusDisk);
    CuAssertTrue(testCase, cactusDisk_getFlower(cactusDisk, flower_getName(flower)) == flower);
    CuAssertTrue(testCase, cactusDisk_getFlower(cactusDisk, flower_getName(flower2)) == flower2);
    //now try closing the disk, then reloading it, to see if we get the same result.
    Name name1 = flower_getName(flower);
    Name name2 = flower_getName(flower2);
    cactusDisk_write(cactusDisk);
    cactusDisk_destruct(cactusDisk);
    cactusDisk = cactusDisk_construct(conf, 0);
    flower = cactusDisk_getFlower(cactusDisk, name1);
    flower2 = cactusDisk_getFlower(cactusDisk, name2);
    CuAssertTrue(testCase, flower != NULL);
    CuAssertTrue(testCase, flower2 != NULL);
    CuAssertTrue(testCase, flower_getName(flower) == name1);
    CuAssertTrue(testCase, flower_getName(flower2) == name2);
    cactusDiskTestTeardown();
}
Esempio n. 4
0
int main(int argc, char *argv[])
{
    char *cactusDiskString = NULL;
    stKVDatabaseConf *kvDatabaseConf;
    CactusDisk *cactusDisk;
    Flower *flower;
    Flower_SequenceIterator *flowerIt;
    Sequence *sequence;
    struct option longopts[] = { {"cactusDisk", required_argument, NULL, 'c' },
                                 {0, 0, 0, 0} };
    int flag;
    while((flag = getopt_long(argc, argv, "", longopts, NULL)) != -1) {
        switch(flag) {
        case 'c':
            cactusDiskString = stString_copy(optarg);
            break;
        case '?':
        default:
            usage();
            return 1;
        }
    }
    if (cactusDiskString == NULL) {
        st_errAbort("--cactusDisk option must be provided");
    }
    kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskString);
    cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    // Get top-level flower.
    flower = cactusDisk_getFlower(cactusDisk, 0);
    flowerIt = flower_getSequenceIterator(flower);
    while((sequence = flower_getNextSequence(flowerIt)) != NULL) {
        MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
        const char *header;
        char *firstToken, *newHeader;
        stList *tokens;
        // Strip the ID token from the header (should be the first
        // |-separated token) and complain if there isn't one.
        header = metaSequence_getHeader(metaSequence);
        tokens = fastaDecodeHeader(header);
        assert(stList_length(tokens) > 1);
        firstToken = stList_removeFirst(tokens);
        assert(!strncmp(firstToken, "id=", 3));
        free(firstToken);
        newHeader = fastaEncodeHeader(tokens);
        metaSequence_setHeader(metaSequence, newHeader);
    }
    cactusDisk_write(cactusDisk);
}
Esempio n. 5
0
int main(int argc, char *argv[]) {
    st_setLogLevelFromString(argv[1]);
    st_logDebug("Set up logging\n");

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(argv[2]);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logDebug("Set up the flower disk\n");

    Name flowerName = cactusMisc_stringToName(argv[3]);
    Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName);

    int64_t totalBases = flower_getTotalBaseLength(flower);
    int64_t totalEnds = flower_getEndNumber(flower);
    int64_t totalFreeEnds = flower_getFreeStubEndNumber(flower);
    int64_t totalAttachedEnds = flower_getAttachedStubEndNumber(flower);
    int64_t totalCaps = flower_getCapNumber(flower);
    int64_t totalBlocks = flower_getBlockNumber(flower);
    int64_t totalGroups = flower_getGroupNumber(flower);
    int64_t totalChains = flower_getChainNumber(flower);
    int64_t totalLinkGroups = 0;
    int64_t maxEndDegree = 0;
    int64_t maxAdjacencyLength = 0;
    int64_t totalEdges = 0;

    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while((end = flower_getNextEnd(endIt)) != NULL) {
        assert(end_getOrientation(end));
        if(end_getInstanceNumber(end) > maxEndDegree) {
            maxEndDegree = end_getInstanceNumber(end);
        }
        stSortedSet *ends = stSortedSet_construct();
        End_InstanceIterator *capIt = end_getInstanceIterator(end);
        Cap *cap;
        while((cap = end_getNext(capIt)) != NULL) {
            if(cap_getSequence(cap) != NULL) {
                Cap *adjacentCap = cap_getAdjacency(cap);
                assert(adjacentCap != NULL);
                End *adjacentEnd = end_getPositiveOrientation(cap_getEnd(adjacentCap));
                stSortedSet_insert(ends, adjacentEnd);
                int64_t adjacencyLength = cap_getCoordinate(cap) - cap_getCoordinate(adjacentCap);
                if(adjacencyLength < 0) {
                    adjacencyLength *= -1;
                }
                assert(adjacencyLength >= 1);
                if(adjacencyLength >= maxAdjacencyLength) {
                    maxAdjacencyLength = adjacencyLength;
                }
            }
        }
        end_destructInstanceIterator(capIt);
        totalEdges += stSortedSet_size(ends);
        if(stSortedSet_search(ends, end) != NULL) { //This ensures we count self edges twice, so that the division works.
            totalEdges += 1;
        }
        stSortedSet_destruct(ends);
    }
    assert(totalEdges % 2 == 0);
    flower_destructEndIterator(endIt);

    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while((group = flower_getNextGroup(groupIt)) != NULL) {
        if(group_getLink(group) != NULL) {
            totalLinkGroups++;
        }
    }
    flower_destructGroupIterator(groupIt);

    printf("flower name: %" PRIi64 " total bases: %" PRIi64 " total-ends: %" PRIi64 " total-caps: %" PRIi64 " max-end-degree: %" PRIi64 " max-adjacency-length: %" PRIi64 " total-blocks: %" PRIi64 " total-groups: %" PRIi64 " total-edges: %" PRIi64 " total-free-ends: %" PRIi64 " total-attached-ends: %" PRIi64 " total-chains: %" PRIi64 " total-link groups: %" PRIi64 "\n",
            flower_getName(flower), totalBases, totalEnds, totalCaps, maxEndDegree, maxAdjacencyLength, totalBlocks, totalGroups, totalEdges/2, totalFreeEnds, totalAttachedEnds, totalChains, totalLinkGroups);

    return 0;
}
Esempio n. 6
0
int main(int argc, char *argv[]) {
    /*
     * Script for adding a reference genome to a flower.
     */

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    char *referenceEventString =
            (char *) cactusMisc_getDefaultReferenceEventHeader();
    char *outputFile = NULL;
    Name flowerName = NULL_NAME;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel",
                required_argument, 0, 'a' }, { "cactusDisk", required_argument,
                0, 'c' },  { "flowerName", required_argument,
                        0, 'd' },
                { "referenceEventString", required_argument, 0, 'g' }, {
                        "help", no_argument, 0, 'h' }, { "outputFile",
                        required_argument, 0, 'k' },
                { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:c:d:g:hk:", long_options,
                &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'c':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'd':
                flowerName = cactusMisc_stringToName(optarg);
                break;
            case 'g':
                referenceEventString = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'k':
                outputFile = stString_copy(optarg);
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(cactusDiskDatabaseString != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(
            cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logInfo("Set up the flower disk\n");


    ///////////////////////////////////////////////////////////////////////////
    // Get the set of flowers to manipulate
    ///////////////////////////////////////////////////////////////////////////

    Flower *flower = cactusDisk_getFlower(cactusDisk, flowerName);

    ///////////////////////////////////////////////////////////////////////////
    // Get the reference event name
    ///////////////////////////////////////////////////////////////////////////

    Event *referenceEvent = eventTree_getEventByHeader(
            flower_getEventTree(flower), referenceEventString);
    assert(referenceEvent != NULL);
    Name referenceEventName = event_getName(referenceEvent);

    ///////////////////////////////////////////////////////////////////////////
    // Now process each flower in turn.
    ///////////////////////////////////////////////////////////////////////////
    
    if(outputFile == NULL) {
        st_errAbort("No output file specified\n");
    }
    FILE *fileHandle = fopen(outputFile, "w");
    printFastaSequences(flower, fileHandle, referenceEventName);
    if(fileHandle != NULL) {
        fclose(fileHandle);
    }

    ///////////////////////////////////////////////////////////////////////////
    //Clean up memory
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);

    //return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.

    free(cactusDiskDatabaseString);
    free(referenceEventString);
    free(logLevelString);

    st_logInfo("Cleaned stuff up and am finished\n");
    //while(1);
    return 0;
}
Esempio n. 7
0
int main(int argc, char *argv[]) {
    /*
     * Open the database.
     * Construct a flower.
     * Construct an event tree representing the species tree.
     * For each sequence contruct two ends each containing an cap.
     * Make a file for the sequence.
     * Link the two caps.
     * Finish!
     */

    int64_t key, j;
    Group *group;
    Flower_EndIterator *endIterator;
    End *end;
    bool makeEventHeadersAlphaNumeric = 0;

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * speciesTree = NULL;
    char * outgroupEvents = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, {
                "speciesTree", required_argument, 0, 'f' }, { "outgroupEvents", required_argument, 0, 'g' },
                { "help", no_argument, 0, 'h' }, { "makeEventHeadersAlphaNumeric", no_argument, 0, 'i' }, { 0, 0, 0, 0 } };

        int option_index = 0;

        key = getopt_long(argc, argv, "a:b:f:hg:i", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = optarg;
                break;
            case 'b':
                cactusDiskDatabaseString = optarg;
                break;
            case 'f':
                speciesTree = optarg;
                break;
            case 'g':
                outgroupEvents = optarg;
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                makeEventHeadersAlphaNumeric = 1;
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    //assert(logLevelString == NULL || strcmp(logLevelString, "CRITICAL") == 0 || strcmp(logLevelString, "INFO") == 0 || strcmp(logLevelString, "DEBUG") == 0);
    assert(cactusDiskDatabaseString != NULL);
    assert(speciesTree != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString);

    for (j = optind; j < argc; j++) {
        st_logInfo("Sequence file/directory %s\n", argv[j]);
    }

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    stKVDatabaseConf *kvDatabaseConf = kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    if (stKVDatabaseConf_getType(kvDatabaseConf) == stKVDatabaseTypeTokyoCabinet || stKVDatabaseConf_getType(kvDatabaseConf)
            == stKVDatabaseTypeKyotoTycoon) {
        assert(stKVDatabaseConf_getDir(kvDatabaseConf) != NULL);
        cactusDisk = cactusDisk_construct2(kvDatabaseConf, "cactusSequences");
    } else {
        cactusDisk = cactusDisk_construct(kvDatabaseConf, 1);
    }
    st_logInfo("Set up the flower disk\n");

    //////////////////////////////////////////////
    //Construct the flower
    //////////////////////////////////////////////

    if (cactusDisk_getFlower(cactusDisk, 0) != NULL) {
        cactusDisk_destruct(cactusDisk);
        st_logInfo("The first flower already exists\n");
        return 0;
    }
    flower = flower_construct2(0, cactusDisk);
    assert(flower_getName(flower) == 0);
    st_logInfo("Constructed the flower\n");

    //////////////////////////////////////////////
    //Construct the event tree
    //////////////////////////////////////////////

    st_logInfo("Going to build the event tree with newick string: %s\n", speciesTree);
    stTree *tree = stTree_parseNewickString(speciesTree);
    st_logInfo("Parsed the tree\n");
    if (makeEventHeadersAlphaNumeric) {
        makeEventHeadersAlphaNumericFn(tree);
    }
    stTree_setBranchLength(tree, INT64_MAX);
    checkBranchLengthsAreDefined(tree);
    eventTree = eventTree_construct2(flower); //creates the event tree and the root even
    totalEventNumber = 1;
    st_logInfo("Constructed the basic event tree\n");

    // Construct a set of outgroup names so that ancestral outgroups
    // get recognized.
    stSet *outgroupNameSet = stSet_construct3(stHash_stringKey,
                                              stHash_stringEqualKey,
                                              free);
    if(outgroupEvents != NULL) {
        stList *outgroupNames = stString_split(outgroupEvents);
        for(int64_t i = 0; i < stList_length(outgroupNames); i++) {
            char *outgroupName = stList_get(outgroupNames, i);
            stSet_insert(outgroupNameSet, stString_copy(outgroupName));
        }
        stList_destruct(outgroupNames);
    }

    //now traverse the tree
    j = optind;
    assignEventsAndSequences(eventTree_getRootEvent(eventTree), tree,
                             outgroupNameSet, argv, &j);

    char *eventTreeString = eventTree_makeNewickString(eventTree);
    st_logInfo(
            "Constructed the initial flower with %" PRIi64 " sequences and %" PRIi64 " events with string: %s\n",
            totalSequenceNumber, totalEventNumber, eventTreeString);
    assert(event_getSubTreeBranchLength(eventTree_getRootEvent(eventTree)) >= 0.0);
    free(eventTreeString);
    //assert(0);

    //////////////////////////////////////////////
    //Label any outgroup events.
    //////////////////////////////////////////////

    if (outgroupEvents != NULL) {
        stList *outgroupEventsList = stString_split(outgroupEvents);
        for (int64_t i = 0; i < stList_length(outgroupEventsList); i++) {
            char *outgroupEvent = makeEventHeadersAlphaNumeric ? makeAlphaNumeric(stList_get(outgroupEventsList, i)) : stString_copy(stList_get(outgroupEventsList, i));
            Event *event = eventTree_getEventByHeader(eventTree, outgroupEvent);
            if (event == NULL) {
                st_errAbort("Got an outgroup string that does not match an event, outgroup string %s", outgroupEvent);
            }
            assert(!event_isOutgroup(event));
            event_setOutgroupStatus(event, 1);
            assert(event_isOutgroup(event));
            free(outgroupEvent);
        }
        stList_destruct(outgroupEventsList);
    }

    //////////////////////////////////////////////
    //Construct the terminal group.
    //////////////////////////////////////////////

    if (flower_getEndNumber(flower) > 0) {
        group = group_construct2(flower);
        endIterator = flower_getEndIterator(flower);
        while ((end = flower_getNextEnd(endIterator)) != NULL) {
            end_setGroup(end, group);
        }
        flower_destructEndIterator(endIterator);
        assert(group_isLeaf(group));

        // Create a one link chain if there is only one pair of attached ends..
        group_constructChainForLink(group);
        assert(!flower_builtBlocks(flower));
    } else {
        flower_setBuiltBlocks(flower, 1);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Write the flower to disk.
    ///////////////////////////////////////////////////////////////////////////

    //flower_check(flower);
    cactusDisk_write(cactusDisk);
    st_logInfo("Updated the flower on disk\n");

    ///////////////////////////////////////////////////////////////////////////
    // Cleanup.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);

    return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.

    stSet_destruct(outgroupNameSet);
    stTree_destruct(tree);
    stKVDatabaseConf_destruct(kvDatabaseConf);

    return 0;
}
int main(int argc, char *argv[]) {
    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    char * flowerName = NULL;
    char * outputFile = NULL;
    char *referenceEventString = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { 
                { "logLevel", required_argument, 0, 'a' }, 
                { "referenceEventString", required_argument, 0, 'b' },
                { "cactusDisk", required_argument, 0, 'c' }, 
		{ "flowerName", required_argument, 0, 'e' },
		{ "outputFile", required_argument, 0, 'f' },
                { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:c:d:e:f:h", long_options,
                &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                referenceEventString = stString_copy(optarg);
                break;
            case 'c':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'e':
                flowerName = stString_copy(optarg);
                break;
            case 'f':
                outputFile = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(flowerName != NULL);
    assert(referenceEventString != NULL);
    assert(cactusDiskDatabaseString != NULL);
    assert(outputFile != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("Flower name : %s\n", flowerName);
    st_logInfo("Sequence name : %s\n", referenceEventString);
    st_logInfo("Output file : %s\n", outputFile);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(
            cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, false, true);
    st_logInfo("Set up the flower disk\n");

    ///////////////////////////////////////////////////////////////////////////
    // Parse the basic reconstruction problem
    ///////////////////////////////////////////////////////////////////////////

    Flower *flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(
            flowerName));
    st_logInfo("Parsed the top level flower of the cactus tree to check\n");

    ///////////////////////////////////////////////////////////////////////////
    // Recursive check the flowers.
    ///////////////////////////////////////////////////////////////////////////

    //int64_t startTime = time(NULL);
    //flower = flower_addReferenceSequence(flower, cactusDisk, name);
    //st_logInfo("Added the reference sequence in %" PRIi64 " seconds/\n", time(NULL) - startTime);

    int64_t numSequences = flower_getSequenceNumber(flower);
    //Make sure that referenceSequence has already been added:
    if(getSequenceMatchesEvent(flower, referenceEventString) == NULL &&
       numSequences > 0){
        fprintf(stderr, "No reference sequence found in cactusDisk\n");
        exit(EXIT_FAILURE); 
    }
      
    FILE *fileHandle = fopen(outputFile, "w");
    if (numSequences > 0) {
      getReferenceSequences(fileHandle, flower, referenceEventString);
    }
    else {
      st_logCritical("cactus_getReferenceSeq found no reference sequence in empty cactus disk %s",
                     cactusDiskDatabaseString);
    }
    fclose(fileHandle);

    ///////////////////////////////////////////////////////////////////////////
    // Clean up.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);

    return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.

    stKVDatabaseConf_destruct(kvDatabaseConf);

    return 0;
}
int main(int argc, char *argv[]) {
    Flower *flower;
    FILE *fileHandle;

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    char * flowerName = NULL;
    char * outputFile = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel",
                required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0,
                'c' }, { "flowerName", required_argument, 0, 'd' }, {
                "outputFile", required_argument, 0, 'e' }, { "scaleNodeSizes",
                no_argument, 0, 'f' }, { "nameLabels", no_argument, 0, 'g' }, {
                "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:c:d:e:fgh", long_options,
                &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'c':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'd':
                flowerName = stString_copy(optarg);
                break;
            case 'e':
                outputFile = stString_copy(optarg);
                break;
            case 'f':
                scaleNodeSizes = !scaleNodeSizes;
                break;
            case 'g':
                nameLabels = !nameLabels;
                break;
            case 'h':
                usage();
                return 0;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(cactusDiskDatabaseString != NULL);
    assert(flowerName != NULL);
    assert(outputFile != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("Flower name : %s\n", flowerName);
    st_logInfo("Output graph file : %s\n", outputFile);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    st_logInfo("Set up the flower disk\n");

    ///////////////////////////////////////////////////////////////////////////
    // Parse the basic reconstruction problem
    ///////////////////////////////////////////////////////////////////////////

    flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(flowerName));
    st_logInfo("Parsed the top level flower of the cactus tree to build\n");

    ///////////////////////////////////////////////////////////////////////////
    // Build the graph.
    ///////////////////////////////////////////////////////////////////////////

    totalProblemSize = flower_getTotalBaseLength(flower);
    fileHandle = fopen(outputFile, "w");
    graphViz_setupGraphFile(fileHandle);
    makeCactusTree_flower(flower, fileHandle, NULL, NULL);
    graphViz_finishGraphFile(fileHandle);
    fclose(fileHandle);
    st_logInfo("Written the tree to file\n");

    ///////////////////////////////////////////////////////////////////////////
    // Clean up.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_destruct(cactusDisk);
    stKVDatabaseConf_destruct(kvDatabaseConf);

    return 0;
}
//============================== MAIN =========================================
int main(int argc, char *argv[]) {
   Flower *flower;

   /*
    * Arguments/options
    */
   char * st_logLevelString = NULL;
   char * cactusDiskDatabaseString = NULL;
   char * flowerName = "0";
   char * outputFile = NULL;
   char * species = NULL;
   char * geneFile = NULL;

   ///////////////////////////////////////////////////////////////////////////
   // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
   ///////////////////////////////////////////////////////////////////////////

   while(1) {
      static struct option long_options[] = {
         { "genePslFile", required_argument, 0, 'g' },
         { "species", required_argument, 0, 's' },
         { "st_logLevel", required_argument, 0, 'a' },
         { "cactusDisk", required_argument, 0, 'c' },
         { "outputFile", required_argument, 0, 'o' },
         { "help", no_argument, 0, 'h' },
         { 0, 0, 0, 0 }
      };

      int option_index = 0;

      int key = getopt_long(argc, argv, "s:g:o:a:c:h", long_options, &option_index);

      if(key == -1) {
         break;
      }

      switch(key) {
         case 'a':
            st_logLevelString = stString_copy(optarg);
            break;
         case 'c':
            cactusDiskDatabaseString = stString_copy(optarg);
            break;
         case 'o':
            outputFile = stString_copy(optarg);
            break;
         case 's':
            species = stString_copy(optarg);
            break;
         case 'g':
            geneFile = stString_copy(optarg);
            break;
         case 'h':
            usage();
            return 0;
         default:
            usage();
            return 1;
      }
   }

   ///////////////////////////////////////////////////////////////////////////
   // (0) Check the inputs.
   ///////////////////////////////////////////////////////////////////////////

   assert(cactusDiskDatabaseString != NULL);
   assert(outputFile != NULL);
   assert(species != NULL);
   assert(geneFile != NULL);

   //////////////////////////////////////////////
   //Set up st_logging
   //////////////////////////////////////////////

   st_setLogLevelFromString(st_logLevelString);

   //////////////////////////////////////////////
   //Log (some of) the inputs
   //////////////////////////////////////////////

   st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString);
   st_logInfo("Output file : %s\n", outputFile);
   st_logInfo("Species: %s\n", species);
   st_logInfo("GenePslFile: %s\n", geneFile);

   //////////////////////////////////////////////
   //Load the database
   //////////////////////////////////////////////

   stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
   CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
   st_logInfo("Set up the flower disk\n");

   ///////////////////////////////////////////////////////////////////////////
   // Parse the basic reconstruction problem
   ///////////////////////////////////////////////////////////////////////////
   flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(flowerName));
   st_logInfo("Parsed the top level flower of the cactus tree to check\n");

   ///////////////////////////////////////////////////////////////////////////
   // Recursive check the flowers.
   ///////////////////////////////////////////////////////////////////////////

   int64_t startTime = time(NULL);
   FILE *fileHandle = fopen(outputFile, "w");
   struct bed *gene = bedLoadAll(geneFile);
   mapGenes(flower, fileHandle, gene, species);
   fclose(fileHandle);
   st_logInfo("Map genes in %" PRIi64 " seconds/\n", time(NULL) - startTime);

   ///////////////////////////////////////////////////////////////////////////
   // Clean up.
   ///////////////////////////////////////////////////////////////////////////

   cactusDisk_destruct(cactusDisk);

   return 0;
}
Esempio n. 11
0
void testFlower_getName(CuTest* testCase) {
    cactusFlowerTestSetup();
    CuAssertTrue(testCase, flower_getName(flower) != NULL_NAME);
    CuAssertTrue(testCase, cactusDisk_getFlower(cactusDisk, flower_getName(flower)) == flower);
    cactusFlowerTestTeardown();
}
Esempio n. 12
0
int main(int argc, char *argv[]) {

    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    int64_t i, j;
    int64_t spanningTrees = 10;
    int64_t maximumLength = 1500;
    bool useProgressiveMerging = 0;
    float matchGamma = 0.5;
    bool useBanding = 0;
    int64_t k;
    stList *listOfEndAlignmentFiles = NULL;
    char *endAlignmentsToPrecomputeOutputFile = NULL;
    bool calculateWhichEndsToComputeSeparately = 0;
    int64_t largeEndSize = 1000000;
    int64_t chainLengthForBigFlower = 1000000;
    int64_t longChain = 2;
    char *ingroupCoverageFilePath = NULL;
    int64_t minimumSizeToRescue = 1;
    double minimumCoverageToRescue = 0.0;

    PairwiseAlignmentParameters *pairwiseAlignmentBandingParameters = pairwiseAlignmentBandingParameters_construct();

    /*
     * Setup the input parameters for cactus core.
     */
    bool pruneOutStubAlignments = 0;

    /*
     * Parse the options.
     */
    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, {
                "help", no_argument, 0, 'h' }, { "spanningTrees", required_argument, 0, 'i' },
                { "maximumLength", required_argument, 0, 'j' }, { "useBanding", no_argument, 0, 'k' },
                { "gapGamma", required_argument, 0, 'l' }, { "matchGamma", required_argument, 0, 'L' },
                { "splitMatrixBiggerThanThis", required_argument, 0, 'o' }, { "anchorMatrixBiggerThanThis",
                        required_argument, 0, 'p' }, { "repeatMaskMatrixBiggerThanThis", required_argument, 0, 'q' }, {
                        "diagonalExpansion", required_argument, 0, 'r' }, { "constraintDiagonalTrim", required_argument, 0, 't' }, {
                        "minimumDegree", required_argument, 0, 'u' }, { "alignAmbiguityCharacters", no_argument, 0, 'w' }, {
                        "pruneOutStubAlignments", no_argument, 0, 'y' }, {
                        "minimumIngroupDegree", required_argument, 0, 'A' }, { "minimumOutgroupDegree", required_argument, 0, 'B' },
                { "precomputedAlignments", required_argument, 0, 'D' }, {
                        "endAlignmentsToPrecomputeOutputFile", required_argument, 0, 'E' }, { "useProgressiveMerging",
                        no_argument, 0, 'F' }, { "calculateWhichEndsToComputeSeparately", no_argument, 0, 'G' }, { "largeEndSize",
                        required_argument, 0, 'I' },
                        {"ingroupCoverageFile", required_argument, 0, 'J'},
                        {"minimumSizeToRescue", required_argument, 0, 'K'},
                        {"minimumCoverageToRescue", required_argument, 0, 'M'},
                        { "minimumNumberOfSpecies", required_argument, 0, 'N' },
                        { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:hi:j:kl:o:p:q:r:t:u:wy:A:B:D:E:FGI:J:K:L:M:N:", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                st_setLogLevelFromString(logLevelString);
                break;
            case 'b':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'i':
                i = sscanf(optarg, "%" PRIi64 "", &spanningTrees);
                (void) i;
                assert(i == 1);
                assert(spanningTrees >= 0);
                break;
            case 'j':
                i = sscanf(optarg, "%" PRIi64 "", &maximumLength);
                assert(i == 1);
                assert(maximumLength >= 0);
                break;
            case 'k':
                useBanding = !useBanding;
                break;
            case 'l':
                i = sscanf(optarg, "%f", &pairwiseAlignmentBandingParameters->gapGamma);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->gapGamma >= 0.0);
                break;
            case 'L':
                i = sscanf(optarg, "%f", &matchGamma);
                assert(i == 1);
                assert(matchGamma >= 0.0);
                break;
            case 'o':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->splitMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'p':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->anchorMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'q':
                i = sscanf(optarg, "%" PRIi64 "", &k);
                assert(i == 1);
                assert(k >= 0);
                pairwiseAlignmentBandingParameters->repeatMaskMatrixBiggerThanThis = (int64_t) k * k;
                break;
            case 'r':
                i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->diagonalExpansion);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->diagonalExpansion >= 0);
                assert(pairwiseAlignmentBandingParameters->diagonalExpansion % 2 == 0);
                break;
            case 't':
                i = sscanf(optarg, "%" PRIi64 "", &pairwiseAlignmentBandingParameters->constraintDiagonalTrim);
                assert(i == 1);
                assert(pairwiseAlignmentBandingParameters->constraintDiagonalTrim >= 0);
                break;
            case 'u':
                i = sscanf(optarg, "%" PRIi64 "", &minimumDegree);
                assert(i == 1);
                break;
            case 'w':
                pairwiseAlignmentBandingParameters->alignAmbiguityCharacters = 1;
                break;
            case 'y':
                pruneOutStubAlignments = 1;
                break;
            case 'A':
                i = sscanf(optarg, "%" PRIi64 "", &minimumIngroupDegree);
                assert(i == 1);
                break;
            case 'B':
                i = sscanf(optarg, "%" PRIi64 "", &minimumOutgroupDegree);
                assert(i == 1);
                break;
            case 'D':
                listOfEndAlignmentFiles = stString_split(optarg);
                break;
            case 'E':
                endAlignmentsToPrecomputeOutputFile = stString_copy(optarg);
                break;
            case 'F':
                useProgressiveMerging = 1;
                break;
            case 'G':
                calculateWhichEndsToComputeSeparately = 1;
                break;
            case 'I':
                i = sscanf(optarg, "%" PRIi64 "", &largeEndSize);
                assert(i == 1);
                break;
            case 'J':
                ingroupCoverageFilePath = stString_copy(optarg);
                break;
            case 'K':
                i = sscanf(optarg, "%" PRIi64, &minimumSizeToRescue);
                assert(i == 1);
                break;
            case 'M':
                i = sscanf(optarg, "%lf", &minimumCoverageToRescue);
                assert(i == 1);
                break;
            case 'N':
                i = sscanf(optarg, "%" PRIi64, &minimumNumberOfSpecies);
                if (i != 1) {
                    st_errAbort("Error parsing minimumNumberOfSpecies parameter");
                }
                break;
            default:
                usage();
                return 1;
        }
    }

    st_setLogLevelFromString(logLevelString);

    /*
     * Load the flowerdisk
     */
    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); //We precache the sequences
    st_logInfo("Set up the flower disk\n");

    /*
     * Load the hmm
     */
    StateMachine *sM = stateMachine5_construct(fiveState);

    /*
     * For each flower.
     */
    if (calculateWhichEndsToComputeSeparately) {
        stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
        if (stList_length(flowers) != 1) {
            st_errAbort("We are breaking up a flower's end alignments for precomputation but we have %" PRIi64 " flowers.\n", stList_length(flowers));
        }
        stSortedSet *endsToAlignSeparately = getEndsToAlignSeparately(stList_get(flowers, 0), maximumLength, largeEndSize);
        assert(stSortedSet_size(endsToAlignSeparately) != 1);
        stSortedSetIterator *it = stSortedSet_getIterator(endsToAlignSeparately);
        End *end;
        while ((end = stSortedSet_getNext(it)) != NULL) {
            fprintf(stdout, "%s\t%" PRIi64 "\t%" PRIi64 "\n", cactusMisc_nameToStringStatic(end_getName(end)), end_getInstanceNumber(end), getTotalAdjacencyLength(end));
        }
        return 0; //avoid cleanup costs
        stSortedSet_destructIterator(it);
        stSortedSet_destruct(endsToAlignSeparately);
    } else if (endAlignmentsToPrecomputeOutputFile != NULL) {
        /*
         * In this case we will align a set of end and save the alignments in a file.
         */
        stList *names = flowerWriter_parseNames(stdin);
        Flower *flower = cactusDisk_getFlower(cactusDisk, *((Name *)stList_get(names, 0)));
        FILE *fileHandle = fopen(endAlignmentsToPrecomputeOutputFile, "w");
        for(int64_t i=1; i<stList_length(names); i++) {
            End *end = flower_getEnd(flower, *((Name *)stList_get(names, i)));
            if (end == NULL) {
                st_errAbort("The end %" PRIi64 " was not found in the flower\n", *((Name *)stList_get(names, i)));
            }
            stSortedSet *endAlignment = makeEndAlignment(sM, end, spanningTrees, maximumLength, useProgressiveMerging,
                            matchGamma, pairwiseAlignmentBandingParameters);
            writeEndAlignmentToDisk(end, endAlignment, fileHandle);
            stSortedSet_destruct(endAlignment);
        }
        fclose(fileHandle);
        return 0; //avoid cleanup costs
        stList_destruct(names);
        st_logInfo("Finished precomputing end alignments\n");
    } else {
        /*
         * Compute complete flower alignments, possibly loading some precomputed alignments.
         */
        bedRegion *bedRegions = NULL;
        size_t numBeds = 0;
        if (ingroupCoverageFilePath != NULL) {
            // Pre-load the mmap for the coverage file.
            FILE *coverageFile = fopen(ingroupCoverageFilePath, "rb");
            if (coverageFile == NULL) {
                st_errnoAbort("Opening coverage file %s failed",
                              ingroupCoverageFilePath);
            }
            fseek(coverageFile, 0, SEEK_END);
            int64_t coverageFileLen = ftell(coverageFile);
            assert(coverageFileLen >= 0);
            assert(coverageFileLen % sizeof(bedRegion) == 0);
            if (coverageFileLen == 0) {
                // mmap doesn't like length-0 mappings, for obvious
                // reasons. Pretend that the coverage file doesn't
                // exist in this case, since it contains no data.
                ingroupCoverageFilePath = NULL;
            } else {
                // Establish a memory mapping for the file.
                bedRegions = mmap(NULL, coverageFileLen, PROT_READ, MAP_SHARED,
                                  fileno(coverageFile), 0);
                if (bedRegions == MAP_FAILED) {
                    st_errnoAbort("Failure mapping coverage file");
                }

                numBeds = coverageFileLen / sizeof(bedRegion);
            }
            fclose(coverageFile);
        }

        stList *flowers = flowerWriter_parseFlowersFromStdin(cactusDisk);
        if (listOfEndAlignmentFiles != NULL && stList_length(flowers) != 1) {
            st_errAbort("We have precomputed alignments but %" PRIi64 " flowers to align.\n", stList_length(flowers));
        }
        cactusDisk_preCacheStrings(cactusDisk, flowers);
        for (j = 0; j < stList_length(flowers); j++) {
            flower = stList_get(flowers, j);
            st_logInfo("Processing a flower\n");

            stSortedSet *alignedPairs = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength,
                    useProgressiveMerging, matchGamma, pairwiseAlignmentBandingParameters, pruneOutStubAlignments);
            st_logInfo("Created the alignment: %" PRIi64 " pairs\n", stSortedSet_size(alignedPairs));
            stPinchIterator *pinchIterator = stPinchIterator_constructFromAlignedPairs(alignedPairs, getNextAlignedPairAlignment);

            /*
             * Run the cactus caf functions to build cactus.
             */
            stPinchThreadSet *threadSet = stCaf_setup(flower);
            stCaf_anneal(threadSet, pinchIterator, NULL);
            if (minimumDegree < 2) {
                stCaf_makeDegreeOneBlocks(threadSet);
            }
            if (minimumIngroupDegree > 0 || minimumOutgroupDegree > 0 || minimumDegree > 1) {
                stCaf_melt(flower, threadSet, blockFilterFn, 0, 0, 0, INT64_MAX);
            }

            if (ingroupCoverageFilePath != NULL) {
                // Rescue any sequence that is covered by outgroups
                // but currently unaligned into single-degree blocks.
                stPinchThreadSetIt pinchIt = stPinchThreadSet_getIt(threadSet);
                stPinchThread *thread;
                while ((thread = stPinchThreadSetIt_getNext(&pinchIt)) != NULL) {
                    Cap *cap = flower_getCap(flower,
                                             stPinchThread_getName(thread));
                    assert(cap != NULL);
                    Sequence *sequence = cap_getSequence(cap);
                    assert(sequence != NULL);
                    rescueCoveredRegions(thread, bedRegions, numBeds,
                                         sequence_getName(sequence),
                                         minimumSizeToRescue,
                                         minimumCoverageToRescue);
                }
                stCaf_joinTrivialBoundaries(threadSet);
            }

            stCaf_finish(flower, threadSet, chainLengthForBigFlower, longChain, INT64_MAX, INT64_MAX); //Flower now destroyed.
            stPinchThreadSet_destruct(threadSet);
            st_logInfo("Ran the cactus core script.\n");

            /*
             * Cleanup
             */
            //Clean up the sorted set after cleaning up the iterator
            stPinchIterator_destruct(pinchIterator);
            stSortedSet_destruct(alignedPairs);

            st_logInfo("Finished filling in the alignments for the flower\n");
        }
        stList_destruct(flowers);
        //st_errAbort("Done\n");
        /*
         * Write and close the cactusdisk.
         */
        cactusDisk_write(cactusDisk);
        return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.
        if (bedRegions != NULL) {
            // Clean up our mapping.
            munmap(bedRegions, numBeds * sizeof(bedRegion));
        }
    }


    ///////////////////////////////////////////////////////////////////////////
    // Cleanup
    ///////////////////////////////////////////////////////////////////////////

    stateMachine_destruct(sM);
    cactusDisk_destruct(cactusDisk);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    //destructCactusCoreInputParameters(cCIP);
    free(cactusDiskDatabaseString);
    if (listOfEndAlignmentFiles != NULL) {
        stList_destruct(listOfEndAlignmentFiles);
    }
    if (logLevelString != NULL) {
        free(logLevelString);
    }
    st_logInfo("Finished with the flower disk for this flower.\n");

    //while(1);

    return 0;
}