Ejemplo n.º 1
0
/*
 * A thread is trivial if all the segments it contains come from blocks containing only a reference segment.
 * These reference only segments represent scaffold gaps. At the same time, it processes the thread string
 * to remove the boolean values use to indicate if a thread is trivial or not.
 */
static bool isTrivialString(char **threadString) {
    stList *strings = stString_split(*threadString); //Split splits into individual segments.
    bool trivialString = 1;
    for(int64_t i=0; i<stList_length(strings); i++) {
        char *segmentString = stList_get(strings, i);
        int64_t j = strlen(segmentString)-1; //Location of the boolean value within a segment.
        assert(j > 0);
        assert(segmentString[j] == '0' || segmentString[j] == '1');
        if(segmentString[j] == '1') { //Found a non-trivial segment, hence the thread is non-trivial.
            trivialString = 0;
        }
        segmentString[j] = '\0';
    }
    free(*threadString); //Free old thread string. Doing it this way is a bit more memory efficient, as we don't keep two copies of the string around.
    *threadString = stString_join2("", strings); //Concatenation makes one sequence, now without the booleans.
    stList_destruct(strings);
    return trivialString;
}
Ejemplo n.º 2
0
static void cacheSubstringsFromDB(CactusDisk *cactusDisk, stList *substrings) {
    /*
     * Caches the given set of substrings in the cactusDisk cache.
     */
    if (cactusDisk->storeSequencesInAFile) {
        if (cactusDisk->sequencesReadFileHandle == NULL) {
            if(cactusDisk->sequencesWriteFileHandle != NULL) {
                fsync(fileno(cactusDisk->sequencesWriteFileHandle));
                fclose(cactusDisk->sequencesWriteFileHandle);
                cactusDisk->sequencesWriteFileHandle = NULL;
            }
            cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "r");
            assert(cactusDisk->sequencesReadFileHandle != NULL);
        }
        else {
            assert(cactusDisk->sequencesWriteFileHandle == NULL);
        }
        for (int64_t i = 0; i < stList_length(substrings); i++) {
            Substring *substring = stList_get(substrings, i);
            char *string = getStringFromDisk(cactusDisk->sequencesReadFileHandle, substring->name, substring->start,
                    substring->length);
            stCache_setRecord(cactusDisk->stringCache, substring->name, substring->start, substring->length, string);
#ifndef NDEBUG
            int64_t bytesRead;
            char *string2 = stCache_getRecord(cactusDisk->stringCache, substring->name, substring->start,
                    substring->length, &bytesRead);
            assert(bytesRead == substring->length);
            for (int64_t j = 0; j < substring->length; j++) {
                assert(string2[j] == string[j]);
            }
            free(string2);
#endif
            free(string);
        }
    } else {
        stList *getRequests = stList_construct3(0, free);
        for (int64_t i = 0; i < stList_length(substrings); i++) {
            Substring *substring = stList_get(substrings, i);
            int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE
                    - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1;
            Name shiftedName = substring->name + substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE;
            for (int64_t j = 0; j < intervalSize; j++) {
                int64_t *k = st_malloc(sizeof(int64_t));
                k[0] = shiftedName + j;
                stList_append(getRequests, k);
            }
        }
        if (stList_length(getRequests) == 0) {
            stList_destruct(getRequests);
            return;
        }
        stList *records = NULL;
        stTry
            {
                records = stKVDatabase_bulkGetRecords(cactusDisk->database, getRequests);
            }
            stCatch(except)
                {
                    stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID,
                            "An unknown database error occurred when getting a sequence string");
                }stTryEnd
        ;
        assert(records != NULL);
        assert(stList_length(records) == stList_length(getRequests));
        stList_destruct(getRequests);
        stListIterator *recordsIt = stList_getIterator(records);
        for (int64_t i = 0; i < stList_length(substrings); i++) {
            Substring *substring = stList_get(substrings, i);
            int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE
                    - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1;
            stList *strings = stList_construct();
            while (intervalSize-- > 0) {
                int64_t recordSize;
                stKVDatabaseBulkResult *result = stList_getNext(recordsIt);
                assert(result != NULL);
                char *string = stKVDatabaseBulkResult_getRecord(result, &recordSize);
                assert(string != NULL);
                assert(strlen(string) == recordSize - 1);
                stList_append(strings, string);
                assert(recordSize <= CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1);
            }
            assert(stList_length(strings) > 0);
            char *joinedString = stString_join2("", strings);
            stCache_setRecord(cactusDisk->stringCache, substring->name,
                    (substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE) * CACTUS_DISK_SEQUENCE_CHUNK_SIZE,
                    strlen(joinedString), joinedString);
            free(joinedString);
            stList_destruct(strings);
        }
        assert(stList_getNext(recordsIt) == NULL);
        stList_destructIterator(recordsIt);
        stList_destruct(records);
    }
}