/* * A thread is trivial if all the segments it contains come from blocks containing only a reference segment. * These reference only segments represent scaffold gaps. At the same time, it processes the thread string * to remove the boolean values use to indicate if a thread is trivial or not. */ static bool isTrivialString(char **threadString) { stList *strings = stString_split(*threadString); //Split splits into individual segments. bool trivialString = 1; for(int64_t i=0; i<stList_length(strings); i++) { char *segmentString = stList_get(strings, i); int64_t j = strlen(segmentString)-1; //Location of the boolean value within a segment. assert(j > 0); assert(segmentString[j] == '0' || segmentString[j] == '1'); if(segmentString[j] == '1') { //Found a non-trivial segment, hence the thread is non-trivial. trivialString = 0; } segmentString[j] = '\0'; } free(*threadString); //Free old thread string. Doing it this way is a bit more memory efficient, as we don't keep two copies of the string around. *threadString = stString_join2("", strings); //Concatenation makes one sequence, now without the booleans. stList_destruct(strings); return trivialString; }
static void cacheSubstringsFromDB(CactusDisk *cactusDisk, stList *substrings) { /* * Caches the given set of substrings in the cactusDisk cache. */ if (cactusDisk->storeSequencesInAFile) { if (cactusDisk->sequencesReadFileHandle == NULL) { if(cactusDisk->sequencesWriteFileHandle != NULL) { fsync(fileno(cactusDisk->sequencesWriteFileHandle)); fclose(cactusDisk->sequencesWriteFileHandle); cactusDisk->sequencesWriteFileHandle = NULL; } cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "r"); assert(cactusDisk->sequencesReadFileHandle != NULL); } else { assert(cactusDisk->sequencesWriteFileHandle == NULL); } for (int64_t i = 0; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); char *string = getStringFromDisk(cactusDisk->sequencesReadFileHandle, substring->name, substring->start, substring->length); stCache_setRecord(cactusDisk->stringCache, substring->name, substring->start, substring->length, string); #ifndef NDEBUG int64_t bytesRead; char *string2 = stCache_getRecord(cactusDisk->stringCache, substring->name, substring->start, substring->length, &bytesRead); assert(bytesRead == substring->length); for (int64_t j = 0; j < substring->length; j++) { assert(string2[j] == string[j]); } free(string2); #endif free(string); } } else { stList *getRequests = stList_construct3(0, free); for (int64_t i = 0; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1; Name shiftedName = substring->name + substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE; for (int64_t j = 0; j < intervalSize; j++) { int64_t *k = st_malloc(sizeof(int64_t)); k[0] = shiftedName + j; stList_append(getRequests, k); } } if (stList_length(getRequests) == 0) { stList_destruct(getRequests); return; } stList *records = NULL; stTry { records = stKVDatabase_bulkGetRecords(cactusDisk->database, getRequests); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when getting a sequence string"); }stTryEnd ; assert(records != NULL); assert(stList_length(records) == stList_length(getRequests)); stList_destruct(getRequests); stListIterator *recordsIt = stList_getIterator(records); for (int64_t i = 0; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1; stList *strings = stList_construct(); while (intervalSize-- > 0) { int64_t recordSize; stKVDatabaseBulkResult *result = stList_getNext(recordsIt); assert(result != NULL); char *string = stKVDatabaseBulkResult_getRecord(result, &recordSize); assert(string != NULL); assert(strlen(string) == recordSize - 1); stList_append(strings, string); assert(recordSize <= CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1); } assert(stList_length(strings) > 0); char *joinedString = stString_join2("", strings); stCache_setRecord(cactusDisk->stringCache, substring->name, (substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE) * CACTUS_DISK_SEQUENCE_CHUNK_SIZE, strlen(joinedString), joinedString); free(joinedString); stList_destruct(strings); } assert(stList_getNext(recordsIt) == NULL); stList_destructIterator(recordsIt); stList_destruct(records); } }