Exemplo n.º 1
0
void cactusDisk_write(CactusDisk *cactusDisk) {
    Flower *flower;
    int64_t recordSize;

    stList *removeRequests = stList_construct3(0, (void (*)(void *)) stIntTuple_destruct);

    st_logDebug("Starting to write the cactus to disk\n");

    stSortedSetIterator *it = stSortedSet_getIterator(cactusDisk->flowers);
    //Sort flowers to update.
    while ((flower = stSortedSet_getNext(it)) != NULL) {
        cactusDisk_addUpdateRequest(cactusDisk, flower);
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Got the flowers to update\n");

    //Remove nets that are marked for deletion..
    it = stSortedSet_getIterator(cactusDisk->flowerNamesMarkedForDeletion);
    char *nameString;
    while ((nameString = stSortedSet_getNext(it)) != NULL) {
        Name name = cactusMisc_stringToName(nameString);
        if (containsRecord(cactusDisk, name)) {
            stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(name, &name, 0)); //We set it to null in the first atomic operation.
            stList_append(removeRequests, stIntTuple_construct1(name));
        }
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Avoided updating nets marked for deletion\n");

    // Insert and/or update meta-sequences.
    it = stSortedSet_getIterator(cactusDisk->metaSequences);
    MetaSequence *metaSequence;
    while ((metaSequence = stSortedSet_getNext(it)) != NULL) {
        void *vA =
                binaryRepresentation_makeBinaryRepresentation(metaSequence,
                        (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) metaSequence_writeBinaryRepresentation,
                        &recordSize);
        //Compression
        vA = compress(vA, &recordSize);
        if (!containsRecord(cactusDisk, metaSequence_getName(metaSequence))) {
            stList_append(cactusDisk->updateRequests,
                    stKVDatabaseBulkRequest_constructInsertRequest(metaSequence_getName(metaSequence), vA, recordSize));
        } else {
            stList_append(cactusDisk->updateRequests,
                    stKVDatabaseBulkRequest_constructUpdateRequest(metaSequence_getName(metaSequence), vA, recordSize));
        }
        free(vA);
    }
    stSortedSet_destructIterator(it);

    st_logDebug("Got the sequences we are going to add to the database.\n");

    if (!containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { //We only write the parameters once.
        //Finally the database info.
        void *cactusDiskParameters =
                binaryRepresentation_makeBinaryRepresentation(cactusDisk,
                        (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) cactusDisk_writeBinaryRepresentation,
                        &recordSize);
        //Compression
        cactusDiskParameters = compress(cactusDiskParameters, &recordSize);
        stList_append(cactusDisk->updateRequests,
                stKVDatabaseBulkRequest_constructInsertRequest(CACTUS_DISK_PARAMETER_KEY, cactusDiskParameters,
                        recordSize));
        free(cactusDiskParameters);
    }

    st_logDebug("Checked if need to write the initial parameters\n");

    if (stList_length(cactusDisk->updateRequests) > 0) {
        st_logDebug("Going to write %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests));
        stTry
            {
                st_logDebug("Writing %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests));
                assert(stList_length(cactusDisk->updateRequests) > 0);
                stKVDatabase_bulkSetRecords(cactusDisk->database, cactusDisk->updateRequests);
            }
            stCatch(except)
                {
                    stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                            "Failed when trying to set records in updating the cactus disk");
                }stTryEnd
        ;
    }
int main(int argc, char *argv[]) {
    /*
     * Script for adding a reference genome to a flower.
     */

    /*
     * Arguments/options
     */
    char * logLevelString = NULL;
    char * cactusDiskDatabaseString = NULL;
    char * secondaryDatabaseString = NULL;
    char *referenceEventString = (char *) cactusMisc_getDefaultReferenceEventHeader();
    bool bottomUpPhase = 0;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = { { "logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'b' }, { "secondaryDisk", required_argument, 0, 'd' }, { "referenceEventString", required_argument, 0, 'g' }, { "help", no_argument,
                0, 'h' }, { "bottomUpPhase", no_argument, 0, 'j' }, { 0, 0, 0, 0 } };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:c:d:e:g:hi:j", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                cactusDiskDatabaseString = stString_copy(optarg);
                break;
            case 'd':
                secondaryDatabaseString = stString_copy(optarg);
                break;
            case 'g':
                referenceEventString = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            case 'j':
                bottomUpPhase = 1;
                break;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    assert(cactusDiskDatabaseString != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Load the database
    //////////////////////////////////////////////

    st_logInfo("referenceEventString = %s\n", referenceEventString);
    st_logInfo("bottomUpPhase = %i\n", bottomUpPhase);

    stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
    CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, false, true);
    stKVDatabaseConf_destruct(kvDatabaseConf);
    st_logInfo("Set up the flower disk\n");

    stKVDatabase *sequenceDatabase = NULL;
    if (secondaryDatabaseString != NULL) {
        kvDatabaseConf = stKVDatabaseConf_constructFromString(secondaryDatabaseString);
        sequenceDatabase = stKVDatabase_construct(kvDatabaseConf, 0);
        stKVDatabaseConf_destruct(kvDatabaseConf);
    }

    FlowerStream *flowerStream = flowerWriter_getFlowerStream(cactusDisk, stdin);
    Flower *flower;
    while ((flower = flowerStream_getNext(flowerStream)) != NULL) {
        st_logDebug("Processing flower %" PRIi64 "\n", flower_getName(flower));

        ///////////////////////////////////////////////////////////////////////////
        // Get the appropriate event names
        ///////////////////////////////////////////////////////////////////////////

        st_logInfo("%s\n", eventTree_makeNewickString(flower_getEventTree(flower)));
        Event *referenceEvent = eventTree_getEventByHeader(flower_getEventTree(flower), referenceEventString);
        if (referenceEvent == NULL) {
            st_errAbort("Reference event %s not found in tree. Check your "
                        "--referenceEventString option", referenceEventString);
        }
        Name referenceEventName = event_getName(referenceEvent);

        ///////////////////////////////////////////////////////////////////////////
        // Now do bottom up or top down, depending
        ///////////////////////////////////////////////////////////////////////////
        stList *flowers = stList_construct();
        stList_append(flowers, flower);
        preCacheNestedFlowers(cactusDisk, flowers);
        if (bottomUpPhase) {
            assert(sequenceDatabase != NULL);

            cactusDisk_preCacheSegmentStrings(cactusDisk, flowers);
            bottomUp(flowers, sequenceDatabase, referenceEventName, !flower_hasParentGroup(flower), generateJukesCantorMatrix);

            // Unload the nested flowers to save memory. They haven't
            // been changed, so we don't write them to the cactus
            // disk.
            Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
            Group *group;
            while ((group = flower_getNextGroup(groupIt)) != NULL) {
                if (!group_isLeaf(group)) {
                    flower_unload(group_getNestedFlower(group));
                }
            }
            flower_destructGroupIterator(groupIt);
            assert(!flower_isParentLoaded(flower));

            // Write this flower to disk.
            cactusDisk_addUpdateRequest(cactusDisk, flower);
        } else {
            topDown(flower, referenceEventName);

            // We've changed the nested flowers, but not this
            // flower. We write the nested flowers to disk, then
            // unload them to save memory. This flower will be
            // unloaded by the flower-stream code.
            Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
            Group *group;
            while ((group = flower_getNextGroup(groupIt)) != NULL) {
                if (!group_isLeaf(group)) {
                    cactusDisk_addUpdateRequest(cactusDisk, group_getNestedFlower(group));
                    flower_unload(group_getNestedFlower(group));
                }
            }
            flower_destructGroupIterator(groupIt);
        }
        stList_destruct(flowers);
    }

    ///////////////////////////////////////////////////////////////////////////
    // Write the flower(s) back to disk.
    ///////////////////////////////////////////////////////////////////////////

    cactusDisk_write(cactusDisk);
    st_logInfo("Updated the flower on disk\n");

    ///////////////////////////////////////////////////////////////////////////
    //Clean up.
    ///////////////////////////////////////////////////////////////////////////

    if (sequenceDatabase != NULL) {
        stKVDatabase_destruct(sequenceDatabase);
    }

    cactusDisk_destruct(cactusDisk);

    return 0; //Exit without clean up is quicker, enable cleanup when doing memory leak detection.

    free(cactusDiskDatabaseString);
    free(referenceEventString);
    free(logLevelString);

    st_logInfo("Cleaned stuff up and am finished\n");

    return 0;
}