// TODO: see if we can make this one command static void bulkSetRecords(stKVDatabase *database, stList *records) { startTransaction(database); stTry { for(int32_t i=0; i<stList_length(records); i++) { stKVDatabaseBulkRequest *request = stList_get(records, i); switch(request->type) { case UPDATE: updateRecord(database, request->key, request->value, request->size); break; case INSERT: insertRecord(database, request->key, request->value, request->size); break; case SET: setRecord(database, request->key, request->value, request->size); break; } } commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause( ex, ST_KV_DATABASE_EXCEPTION_ID, "MySQL bulk set records failed"); }stTryEnd; }
static void *getRecord(CactusDisk *cactusDisk, Name objectName, char *type) { void *cA = NULL; int64_t recordSize = 0; if (stCache_containsRecord(cactusDisk->cache, objectName, 0, INT64_MAX)) { //If we already have the record, we won't update it. cA = stCache_getRecord(cactusDisk->cache, objectName, 0, INT64_MAX, &recordSize); } else { stTry { cA = stKVDatabase_getRecord2(cactusDisk->database, objectName, &recordSize); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when getting a %s", type); }stTryEnd ; if (cA == NULL) { return NULL; } stCache_setRecord(cactusDisk->cache, objectName, 0, recordSize, cA); //Add the compressed record to the cache. } //Decompression void *cA2 = decompress(cA, &recordSize); free(cA); return cA2; }
stKVDatabaseConf *stKVDatabaseConf_constructFromString(const char *xmlString) { stKVDatabaseConf *conf = NULL; stTry { conf = constructFromString(xmlString); } stCatch(ex) { stThrowNewCause(ex, ST_KV_DATABASE_EXCEPTION_ID, "Invalid database XML specification: %s", xmlString); } stTryEnd; return conf; }
//TODO: make one command static void bulkRemoveRecords(stKVDatabase *database, stList *records) { startTransaction(database); stTry { for(int32_t i=0; i<stList_length(records); i++) { stInt64Tuple *j = stList_get(records, i); removeRecord(database, stInt64Tuple_getPosition(j, 0)); } commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause( ex, ST_KV_DATABASE_EXCEPTION_ID, "MySQL bulk remove records failed"); }stTryEnd; }
static stList *bulkGetRecordsRange(stKVDatabase *database, int64_t firstKey, int64_t numRecords) { stList* results = stList_construct3(numRecords, (void(*)(void *))stKVDatabaseBulkResult_destruct); startTransaction(database); stTry { for (int32_t i = 0; i < numRecords; ++i) { int64_t key = firstKey + i; int64_t recordSize; void* record = getRecord2(database, key, &recordSize); stKVDatabaseBulkResult* result = stKVDatabaseBulkResult_construct(record, recordSize); stList_set(results, i, result); } commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause(ex, ST_KV_DATABASE_EXCEPTION_ID, "tokyo cabinet bulk get records failed"); }stTryEnd; return results; }
static int64_t incrementInt64(stKVDatabase *database, int64_t key, int64_t incrementAmount) { startTransaction(database); int64_t returnValue = INT64_MIN; stTry { int64_t recordSize; int64_t *record = getRecord2(database, key, &recordSize); assert(recordSize >= sizeof(int64_t)); record[0] += incrementAmount; returnValue = record[0]; updateRecord(database, key, record, recordSize); free(record); commitTransaction(database); }stCatch(ex) { abortTransaction(database); stThrowNewCause( ex, ST_KV_DATABASE_EXCEPTION_ID, "MySQL increment record failed"); }stTryEnd; return returnValue; }
static stList *getRecords(CactusDisk *cactusDisk, stList *objectNames, char *type) { if (stList_length(objectNames) == 0) { return stList_construct3(0, NULL); } stList *records = NULL; stTry { records = stKVDatabase_bulkGetRecords(cactusDisk->database, objectNames); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when getting a bulk set of %s", type); }stTryEnd ; assert(records != NULL); assert(stList_length(objectNames) == stList_length(records)); stList_setDestructor(records, free); for (int64_t i = 0; i < stList_length(objectNames); i++) { Name objectName = *((int64_t *) stList_get(objectNames, i)); int64_t recordSize; void *record; stKVDatabaseBulkResult *result = stList_get(records, i); assert(result != NULL); if (!stCache_containsRecord(cactusDisk->cache, objectName, 0, INT64_MAX)) { record = stKVDatabaseBulkResult_getRecord(result, &recordSize); assert(recordSize >= 0); assert(record != NULL); record = decompress(record, &recordSize); stCache_setRecord(cactusDisk->cache, objectName, 0, recordSize, record); } else { record = stCache_getRecord(cactusDisk->cache, objectName, 0, INT64_MAX, &recordSize); assert(recordSize >= 0); assert(record != NULL); } stKVDatabaseBulkResult_destruct(result); stList_set(records, i, record); } return records; }
void cactusDisk_write(CactusDisk *cactusDisk) { Flower *flower; int64_t recordSize; stList *removeRequests = stList_construct3(0, (void (*)(void *)) stIntTuple_destruct); st_logDebug("Starting to write the cactus to disk\n"); stSortedSetIterator *it = stSortedSet_getIterator(cactusDisk->flowers); //Sort flowers to update. while ((flower = stSortedSet_getNext(it)) != NULL) { cactusDisk_addUpdateRequest(cactusDisk, flower); } stSortedSet_destructIterator(it); st_logDebug("Got the flowers to update\n"); //Remove nets that are marked for deletion.. it = stSortedSet_getIterator(cactusDisk->flowerNamesMarkedForDeletion); char *nameString; while ((nameString = stSortedSet_getNext(it)) != NULL) { Name name = cactusMisc_stringToName(nameString); if (containsRecord(cactusDisk, name)) { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(name, &name, 0)); //We set it to null in the first atomic operation. stList_append(removeRequests, stIntTuple_construct1(name)); } } stSortedSet_destructIterator(it); st_logDebug("Avoided updating nets marked for deletion\n"); // Insert and/or update meta-sequences. it = stSortedSet_getIterator(cactusDisk->metaSequences); MetaSequence *metaSequence; while ((metaSequence = stSortedSet_getNext(it)) != NULL) { void *vA = binaryRepresentation_makeBinaryRepresentation(metaSequence, (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) metaSequence_writeBinaryRepresentation, &recordSize); //Compression vA = compress(vA, &recordSize); if (!containsRecord(cactusDisk, metaSequence_getName(metaSequence))) { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructInsertRequest(metaSequence_getName(metaSequence), vA, recordSize)); } else { stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructUpdateRequest(metaSequence_getName(metaSequence), vA, recordSize)); } free(vA); } stSortedSet_destructIterator(it); st_logDebug("Got the sequences we are going to add to the database.\n"); if (!containsRecord(cactusDisk, CACTUS_DISK_PARAMETER_KEY)) { //We only write the parameters once. //Finally the database info. void *cactusDiskParameters = binaryRepresentation_makeBinaryRepresentation(cactusDisk, (void (*)(void *, void (*)(const void * ptr, size_t size, size_t count))) cactusDisk_writeBinaryRepresentation, &recordSize); //Compression cactusDiskParameters = compress(cactusDiskParameters, &recordSize); stList_append(cactusDisk->updateRequests, stKVDatabaseBulkRequest_constructInsertRequest(CACTUS_DISK_PARAMETER_KEY, cactusDiskParameters, recordSize)); free(cactusDiskParameters); } st_logDebug("Checked if need to write the initial parameters\n"); if (stList_length(cactusDisk->updateRequests) > 0) { st_logDebug("Going to write %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests)); stTry { st_logDebug("Writing %" PRIi64 " updates\n", stList_length(cactusDisk->updateRequests)); assert(stList_length(cactusDisk->updateRequests) > 0); stKVDatabase_bulkSetRecords(cactusDisk->database, cactusDisk->updateRequests); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "Failed when trying to set records in updating the cactus disk"); }stTryEnd ; }
Name cactusDisk_addString(CactusDisk *cactusDisk, const char *string) { /* * Adds a string to the database. */ if (cactusDisk->storeSequencesInAFile) { if (cactusDisk->sequencesWriteFileHandle == NULL) { //We do not allow the read file handle to be open at the same time. if (cactusDisk->sequencesReadFileHandle != NULL) { fclose(cactusDisk->sequencesReadFileHandle); cactusDisk->sequencesReadFileHandle = NULL; } cactusDisk->sequencesWriteFileHandle = fopen(cactusDisk->absSequencesFileName, "a"); assert(cactusDisk->sequencesWriteFileHandle != NULL); } else { //The read file handle should not be open at the same time. assert(cactusDisk->sequencesReadFileHandle == NULL); } Name name = ftell(cactusDisk->sequencesWriteFileHandle) + 1; //Extra temporary cheesy code to avoid potential overflow in fprintf int64_t chunkSize = 1000000000; //1 gig approx chunks, to avoid a possible overflow issue with fprintf int64_t length = strlen(string); if (length > chunkSize) { fprintf(cactusDisk->sequencesWriteFileHandle, ">"); for (int64_t i = 0; i < length;) { int64_t j = i + chunkSize <= length ? chunkSize : length - i; char *string2 = memcpy(st_malloc(sizeof(char) * (j + 1)), string + i, sizeof(char) * j); string2[j] = '\0'; int64_t k = fprintf(cactusDisk->sequencesWriteFileHandle, "%s", string2); (void) k; assert(k == j); free(string2); i += j; } } else { //Replacing this line int64_t k = fprintf(cactusDisk->sequencesWriteFileHandle, ">%s", string); (void) k; assert(k == length + 1); } #ifndef NDEBUG // Extra fsync may not be necessary. fsync(fileno(cactusDisk->sequencesWriteFileHandle)); fclose(cactusDisk->sequencesWriteFileHandle); cactusDisk->sequencesWriteFileHandle = NULL; cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "r"); char *string2 = getStringFromDisk(cactusDisk->sequencesReadFileHandle, name, 0, length); for (int64_t i = 0; i < length; i++) { assert(string[i] == string2[i]); } free(string2); #endif return name; } else { int64_t stringSize = strlen(string); int64_t intervalSize = ceil((double) stringSize / CACTUS_DISK_SEQUENCE_CHUNK_SIZE); Name name = cactusDisk_getUniqueIDInterval(cactusDisk, intervalSize); stList *insertRequests = stList_construct3(0, (void (*)(void *)) stKVDatabaseBulkRequest_destruct); for (int64_t i = 0; i * CACTUS_DISK_SEQUENCE_CHUNK_SIZE < stringSize; i++) { int64_t j = (i + 1) * CACTUS_DISK_SEQUENCE_CHUNK_SIZE < stringSize ? CACTUS_DISK_SEQUENCE_CHUNK_SIZE : stringSize - i * CACTUS_DISK_SEQUENCE_CHUNK_SIZE; char *subString = stString_getSubString(string, i * CACTUS_DISK_SEQUENCE_CHUNK_SIZE, j); stList_append(insertRequests, stKVDatabaseBulkRequest_constructInsertRequest(name + i, subString, j + 1)); free(subString); } stTry { stKVDatabase_bulkSetRecords(cactusDisk->database, insertRequests); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when we tried to add a string to the cactus disk"); }stTryEnd ; stList_destruct(insertRequests); return name; } }
static void cacheSubstringsFromDB(CactusDisk *cactusDisk, stList *substrings) { /* * Caches the given set of substrings in the cactusDisk cache. */ if (cactusDisk->storeSequencesInAFile) { if (cactusDisk->sequencesReadFileHandle == NULL) { if(cactusDisk->sequencesWriteFileHandle != NULL) { fsync(fileno(cactusDisk->sequencesWriteFileHandle)); fclose(cactusDisk->sequencesWriteFileHandle); cactusDisk->sequencesWriteFileHandle = NULL; } cactusDisk->sequencesReadFileHandle = fopen(cactusDisk->absSequencesFileName, "r"); assert(cactusDisk->sequencesReadFileHandle != NULL); } else { assert(cactusDisk->sequencesWriteFileHandle == NULL); } for (int64_t i = 0; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); char *string = getStringFromDisk(cactusDisk->sequencesReadFileHandle, substring->name, substring->start, substring->length); stCache_setRecord(cactusDisk->stringCache, substring->name, substring->start, substring->length, string); #ifndef NDEBUG int64_t bytesRead; char *string2 = stCache_getRecord(cactusDisk->stringCache, substring->name, substring->start, substring->length, &bytesRead); assert(bytesRead == substring->length); for (int64_t j = 0; j < substring->length; j++) { assert(string2[j] == string[j]); } free(string2); #endif free(string); } } else { stList *getRequests = stList_construct3(0, free); for (int64_t i = 0; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1; Name shiftedName = substring->name + substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE; for (int64_t j = 0; j < intervalSize; j++) { int64_t *k = st_malloc(sizeof(int64_t)); k[0] = shiftedName + j; stList_append(getRequests, k); } } if (stList_length(getRequests) == 0) { stList_destruct(getRequests); return; } stList *records = NULL; stTry { records = stKVDatabase_bulkGetRecords(cactusDisk->database, getRequests); } stCatch(except) { stThrowNewCause(except, ST_KV_DATABASE_EXCEPTION_ID, "An unknown database error occurred when getting a sequence string"); }stTryEnd ; assert(records != NULL); assert(stList_length(records) == stList_length(getRequests)); stList_destruct(getRequests); stListIterator *recordsIt = stList_getIterator(records); for (int64_t i = 0; i < stList_length(substrings); i++) { Substring *substring = stList_get(substrings, i); int64_t intervalSize = (substring->length + substring->start - 1) / CACTUS_DISK_SEQUENCE_CHUNK_SIZE - substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1; stList *strings = stList_construct(); while (intervalSize-- > 0) { int64_t recordSize; stKVDatabaseBulkResult *result = stList_getNext(recordsIt); assert(result != NULL); char *string = stKVDatabaseBulkResult_getRecord(result, &recordSize); assert(string != NULL); assert(strlen(string) == recordSize - 1); stList_append(strings, string); assert(recordSize <= CACTUS_DISK_SEQUENCE_CHUNK_SIZE + 1); } assert(stList_length(strings) > 0); char *joinedString = stString_join2("", strings); stCache_setRecord(cactusDisk->stringCache, substring->name, (substring->start / CACTUS_DISK_SEQUENCE_CHUNK_SIZE) * CACTUS_DISK_SEQUENCE_CHUNK_SIZE, strlen(joinedString), joinedString); free(joinedString); stList_destruct(strings); } assert(stList_getNext(recordsIt) == NULL); stList_destructIterator(recordsIt); stList_destruct(records); } }