int _ol_grow_and_rehash_db(ol_database *db) { int i; ol_bucket *bucket; ol_bucket **tmp_hashes = NULL; size_t to_alloc = db->cur_ht_size * 2; debug("Growing DB to %zu bytes.", to_alloc); tmp_hashes = calloc(1, to_alloc); check_mem(tmp_hashes); struct ol_stack *orphans = NULL; orphans = malloc(sizeof(struct ol_stack)); check_mem(orphans); orphans->next = NULL; orphans->data = NULL; int orphans_found = 0; int iterations = ol_ht_bucket_max(db->cur_ht_size); for (i = 0; i < iterations; i++) { bucket = db->hashes[i]; if (bucket != NULL) { if (bucket->next != NULL) { ol_bucket *tmp_bucket = bucket; do { spush(&orphans, tmp_bucket->next); ol_bucket *next = tmp_bucket->next; tmp_bucket->next = NULL; tmp_bucket = next; orphans_found++; } while (tmp_bucket->next != NULL); } /* Rehash the bucket itself. */ _ol_rehash_insert_bucket(tmp_hashes, to_alloc, bucket); } } /* Take care of our orphans */ ol_log_msg(LOG_INFO, "Have %i orphans to take care of.", orphans_found); do { ol_bucket *rebucket = spop(&orphans); _ol_rehash_insert_bucket(tmp_hashes, to_alloc, rebucket); orphans_found--; } while (orphans->next != NULL); ol_log_msg(LOG_INFO, "We now have %i orphans not accounted for.", orphans_found); free(orphans); free(db->hashes); db->hashes = tmp_hashes; db->cur_ht_size = to_alloc; debug("Current hash table size is now: %zu bytes.", to_alloc); return 0; error: if (tmp_hashes != NULL) free(tmp_hashes); return -1; }
int main(int argc, char *argv[]) { signal(SIGTERM, clean_up); signal(SIGINT, clean_up); signal(SIGCHLD, SIG_IGN); ol_log_msg(LOG_INFO, "Running tests."); int results[2]; run_tests(results); ol_log_msg(LOG_INFO, "Tests passed: %i.\n", results[0]); printf("No.\n"); return results[1]; }
ol_bucket *_ol_get_last_bucket_in_slot(ol_bucket *bucket) { ol_bucket *tmp_bucket = bucket; int depth = 0; while (tmp_bucket->next != NULL) { tmp_bucket = tmp_bucket->next; depth++; if (depth > 1000) ol_log_msg(LOG_WARN, "Depth of bucket stack is crazy, help! It's at %i", depth); } return tmp_bucket; }
int ol_aol_write_cmd(ol_database *db, const char *cmd, ol_bucket *bct) { int ret; if (strncmp(cmd, "JAR", 3) == 0) { /* I'LL RIGOR YER MORTIS */ debug("Writing: \"%.*s\"", (int)bct->klen, bct->key); char aol_str[] = ":%zu:%s" /* cmd length, cmd */ ":%zu:%.*s" /* klen size, key */ ":%zu:%.*s" /* ctype size, content_type */ ":%d:%d" /* sizeof(original_size), original_size */ ":%d:%d" /* sizeof(size_t), data_size */ ":%d:%d"; /* sizeof(size_t), offset into file */ ret = fprintf(db->aolfd, aol_str, strlen(cmd), cmd, bct->klen, (int)bct->klen, bct->key, bct->ctype_size, (int)bct->ctype_size, bct->content_type, intlen(bct->original_size), bct->original_size, intlen(bct->data_size), bct->data_size, intlen(bct->data_offset), bct->data_offset); check(ret > -1, "Error writing to file."); ret = fprintf(db->aolfd, "\n"); } else if (strncmp(cmd, "SCOOP", 5) == 0) { ret = fprintf(db->aolfd, ":%zu:%s:%zu:%s\n", strlen(cmd), cmd, bct->klen, bct->key); check(ret > -1, "Error writing to file."); } else if (strncmp(cmd, "SPOIL", 5) == 0) { char exptime[21] = {'\0'}; _serialize_time(bct->expiration, exptime); ret = fprintf(db->aolfd, ":%zu:%s:%zu:%s:%zu:%*s\n", strlen(cmd), cmd, bct->klen, bct->key, strlen(exptime), 20, exptime); check(ret > -1, "Error writing to file."); } else { ol_log_msg(LOG_ERR, "No such command '%s'", cmd); return -1; } check(ret > -1, "Error writing to file."); return ol_aol_sync(db); error: return -1; }
void graceful_shutdown(int sig) { if (mainKeeper != NULL) { ol_log_msg(LOG_INFO, "Caught SIGINT. Shutting down gracefully."); zmq::context_t context(2); zmq::socket_t socket(context, ZMQ_REQ); socket.connect(SCHEDULER_URI); std::map<std::string, std::string> req; req["type"] = "shutdown"; msgpack::sbuffer to_send; msgpack::pack(&to_send, req); zmq::message_t response(to_send.size()); memcpy(response.data(), to_send.data(), to_send.size()); socket.send(response); // We'll thread.join in the destructor: delete mainKeeper; } exit(0); }
int main(int argc, char *argv[]) { srand(time(NULL)); signal(SIGINT, graceful_shutdown); // Argument defaults: bool verbose = false; int num_workers = 1; // Look for any arguments: int i = 1, files_start_at = -1; for (i = 1; i < argc; i++) { std::string arg = argv[i]; if (arg[0] == '-') { std::string cmd = arg.substr(1); if (cmd == "v" || cmd == "-verbose") { verbose = true; } else if (cmd == "j" || cmd == "jobs") { // ALL THIS F*****G TARPIT! i++; std::string jobs_count = argv[i]; num_workers = std::stoi(jobs_count); if (verbose) ol_log_msg(LOG_INFO, "Running with %i workers.", num_workers); } } else { // This isn't something that starts with a -. Assume it is a file // and move on. files_start_at = i; break; } } if (argc < 2) { ol_log_msg(LOG_ERR, "You need to specify some directories/files to work on."); return 1; } if (files_start_at == argc || files_start_at == -1) { ol_log_msg(LOG_ERR, "You need files to work on in addition to options."); return 1; } ol_log_msg(LOG_INFO, "Starting kyotopantry."); // Process files: int files_added = 0; mainKeeper = new kyotopantry::gatekeeper(verbose, num_workers); for (i = files_start_at; i < argc; i++) { std::string file_to_add = argv[i]; if (!file_exists(file_to_add.c_str())) { ol_log_msg(LOG_WARN, "File %s doesn't exist or I can't open it or something.", argv[i]); continue; } if (verbose) ol_log_msg(LOG_INFO, "Adding %s to queue...", file_to_add.c_str()); if (!mainKeeper->queue_file_job(file_to_add)) { ol_log_msg(LOG_ERR, "Could not add file to queue."); return 1; } files_added++; } if (files_added == 0) { ol_log_msg(LOG_ERR, "Could not add any files. Bummer."); return 1; } if (verbose) ol_log_msg(LOG_INFO, "Processing %i files...", files_added); // Actually do the processing: main_loop(verbose, num_workers); delete mainKeeper; return 0; }
int olt_jar(ol_transaction *tx, const char *key, size_t klen, const unsigned char *value, size_t vsize) { int ret; char _key[KEY_SIZE] = {'\0'}; size_t _klen = 0; ol_database *db = tx->transaction_db; ol_bucket *bucket = ol_get_bucket(db, key, klen, &_key, &_klen); check(_klen > 0, "Key length of zero not allowed."); /* We only want to hit this codepath within the same database, otherwise * weird stuff happens. Like fires and stuff. */ if (bucket != NULL) { /* Flag the transaction as dirty. */ tx->dirty = 1; return _ol_reallocate_bucket(db, bucket, value, vsize); } /* Looks like we don't have an old hash */ ol_bucket *new_bucket = calloc(1, sizeof(ol_bucket)); if (new_bucket == NULL) return OL_FAILURE; /* copy _key into new bucket */ new_bucket->key = malloc(_klen + 1); check_mem(new_bucket->key); new_bucket->key[_klen] = '\0'; if (strncpy(new_bucket->key, _key, _klen) != new_bucket->key) { free(new_bucket); return OL_FAILURE; } new_bucket->klen = _klen; new_bucket->original_size = vsize; /* Compute the new position of the data in the values file: */ const size_t new_offset = db->val_size; if (db->state != OL_S_STARTUP) { unsigned char *new_data_ptr = NULL; if (db->is_enabled(OL_F_LZ4, &db->feature_set)) { /* Compress using LZ4 if enabled */ int maxoutsize = LZ4_compressBound(vsize); _ol_ensure_values_file_size(db, maxoutsize); new_data_ptr = db->values + db->val_size; memset(new_data_ptr, '\0', maxoutsize); /* All these f*****g casts */ size_t cmsize = (size_t)LZ4_compress((char*)value, (char*)new_data_ptr, (int)vsize); if (cmsize == 0) { /* Free allocated data */ free(new_bucket); return OL_FAILURE; } new_bucket->data_size = cmsize; } else { new_bucket->data_size = vsize; _ol_ensure_values_file_size(db, new_bucket->data_size); new_data_ptr = db->values + db->val_size; memset(new_data_ptr, '\0', new_bucket->data_size); if (memcpy(new_data_ptr, value, vsize) != new_data_ptr) { /* Free allocated memory since we're not going to use them */ free(new_bucket); return OL_FAILURE; } } } else { /* We still need to set the data size, but not the actual data. */ if (db->is_enabled(OL_F_LZ4, &db->feature_set)) { /* Since LZ4_compressBound only provides the worst case scenario * and not what the data actually compressed to (we're replaying * the AOL file, remember?) we have to compress it again and grab * the amount of bytes processed. * TODO: This is dumb. Make a function that just sets the bucket size. * This new mythical function should also handle setting the data_offset * of the bucket. */ int maxoutsize = LZ4_compressBound(vsize); char tmp_data[maxoutsize]; /* Don't need to memset tmp_data because I don't care about it. */ size_t cmsize = (size_t)LZ4_compress((char *)value, (char *)tmp_data, (int)vsize); new_bucket->data_size = cmsize; } else { new_bucket->data_size = vsize; } } /* Set the offset of the bucket before we increment it offset globally. */ new_bucket->data_offset = new_offset; /* Remember to increment the tracked data size of the DB. */ db->val_size += new_bucket->data_size; unsigned int bucket_max = ol_ht_bucket_max(db->cur_ht_size); /* TODO: rehash this shit at 80% */ if (db->rcrd_cnt > 0 && db->rcrd_cnt == bucket_max) { debug("Record count is now %i; growing hash table.", db->rcrd_cnt); ret = _ol_grow_and_rehash_db(db); if (ret > 0) { ol_log_msg(LOG_ERR, "Problem rehashing DB. Error code: %i", ret); free(new_bucket); return OL_FAILURE; } } uint32_t hash; MurmurHash3_x86_32(_key, _klen, DEVILS_SEED, &hash); ret = _ol_set_bucket(db, new_bucket, hash); if(ret > 0) ol_log_msg(LOG_ERR, "Problem inserting item: Error code: %i", ret); if(db->is_enabled(OL_F_APPENDONLY, &db->feature_set) && db->state != OL_S_STARTUP) { ol_aol_write_cmd(db, "JAR", new_bucket); } /* Flag the transaction as dirty. */ tx->dirty = 1; return OL_SUCCESS; error: return OL_FAILURE; }
int ol_aol_restore(ol_database *db) { ol_string *command = NULL, *key = NULL, *value = NULL, *ct = NULL, *read_data_size = NULL, *read_org_size = NULL; FILE *fd = fopen(db->aol_file, "r"); check(fd, "Error opening file"); while (!feof(fd)) { command = _ol_read_data(fd); check(command, "Error reading"); /* Kind of a hack to check for EOF. If the struct is blank, then we * read past EOF in _ol_read_data. feof is rarely useful I guess... */ if (command->data == NULL) { free(command); break; } key = _ol_read_data(fd); check(key, "Error reading"); /* Everything needs a key */ if (strncmp(command->data, "JAR", 3) == 0) { ct = _ol_read_data(fd); check(ct, "Error reading"); read_org_size = _ol_read_data(fd); check(read_org_size, "Error reading"); read_data_size = _ol_read_data(fd); check(read_data_size, "Error reading"); value = _ol_read_data(fd); check(value, "Error reading"); size_t original_size = (size_t)strtol(read_org_size->data, NULL, 10); size_t compressed_size = (size_t)strtol(read_data_size->data, NULL, 10); size_t data_offset = (size_t)strtol(value->data, NULL, 10); /* Pointer in the values file to where the data for this command * should be. */ unsigned char *data_ptr = db->values + data_offset; /* Short circuit check to see if the memory in the location is all * null. */ int memory_is_not_null = 0; int i = 0; for (; i < compressed_size; i++) { if ('\0' != data_ptr[i]) { debug("Data is not null on %zu.", data_offset + i); memory_is_not_null = 1; break; } } if (memory_is_not_null) { /* Turns out that in rare cases LZ4 will compress to exactly * the same size as it's starting string. This means we can't * just check to see if original_size != compressed_size, so * instead we first attempt to decompress and check how many * chars were processed. */ char tmp_data[original_size]; char *ret = memset(&tmp_data, 0, original_size); check(ret == tmp_data, "Could not initialize tmp_data parameter."); int processed = LZ4_decompress_fast((const char*)data_ptr, (char *)tmp_data, original_size); if (processed == compressed_size) ol_jar_ct(db, key->data, key->dlen, (unsigned char*)tmp_data, original_size, ct->data, ct->dlen); else { if (original_size != compressed_size) ol_log_msg(LOG_WARN, "Could not decompress data that is probably compressed. Data may have been deleted."); /* Now that we've tried to decompress and failed, send off the raw data instead. */ ol_jar_ct(db, key->data, key->dlen, data_ptr, compressed_size, ct->data, ct->dlen); } } #ifdef DEBUG /* This happens a lot and isn't bad, so I'm commenting it out. */ else ol_log_msg(LOG_WARN, "No data in values file that corresponds with this key. Key has been deleted or updated."); #endif /* Important: Set the new offset to compressed_size + data_offset. * We need to do this because compaction/squishing will leave holes * in the data that we need to account for during replay. */ db->val_size = compressed_size + data_offset; ol_string_free(&read_org_size); ol_string_free(&read_data_size); ol_string_free(&ct); ol_string_free(&value); } else if (strncmp(command->data, "SCOOP", 5) == 0) ol_scoop(db, key->data, key->dlen); else if (strncmp(command->data, "SPOIL", 5) == 0) { ol_string *spoil = _ol_read_data(fd); check(spoil != NULL, "Could not read the rest of SPOIL command for AOL."); struct tm time = {0}; _deserialize_time(&time, spoil->data); check(spoil, "Error reading"); ol_spoil(db, key->data, key->dlen, &time); ol_string_free(&spoil); } /* Strip the newline char after each "record" */ char c; check(fread(&c, 1, 1, fd) != 0, "Error reading"); check(c == '\n', "Could not strip newline"); ol_string_free(&command); ol_string_free(&key); } fclose(fd); return 0; error: ol_log_msg(LOG_ERR, "Restore failed. Corrupt AOL?"); /* Free all the stuff */ ol_string_free(&command); ol_string_free(&key); ol_string_free(&value); ol_string_free(&ct); ol_string_free(&read_org_size); ol_string_free(&read_data_size); if (fd != NULL) fclose(fd); return -1; }
void clean_up(int signum) { ol_close(db); ol_log_msg(LOG_INFO, "Exiting cleanly."); exit(0); }