static void skip_empty_file(Bitcask* bc) { int i, last=0; char opath[255], npath[255]; const char* base = mgr_base(bc->mgr); for (i=0; i<MAX_BUCKET_COUNT; i++) { if (file_exists(gen_path(opath, base, DATA_FILE, i))) { if (i != last) { mgr_rename(opath, gen_path(npath, base, DATA_FILE, last)); if (file_exists(gen_path(opath, base, HINT_FILE, i))) { mgr_rename(opath, gen_path(npath, base, HINT_FILE, last)); } mgr_unlink(gen_path(opath, base, HTREE_FILE, i)); } last ++; } } }
void bc_scan(Bitcask* bc) { const char* path = mgr_base(bc->mgr); char dname[20], hname[20], datapath[255], hintpath[255]; int i=0; struct stat st; for (i=0; i<MAX_BUCKET_COUNT; i++) { sprintf(dname, DATA_FILE, i); sprintf(datapath, "%s/%s", path, dname); if (stat(datapath, &st) != 0) { break; } bc->bytes += st.st_size; sprintf(hname, HINT_FILE, i); sprintf(hintpath, "%s/%s", path, hname); if (bc->before == 0){ if (0 == stat(hintpath, &st)){ scanHintFile(bc->tree, i, hintpath, NULL); }else{ sprintf(hintpath, "%s/%s", mgr_alloc(bc->mgr, hname), hname); scanDataFile(bc->tree, i, datapath, hintpath); } }else{ if (0 == stat(hintpath, &st) && (st.st_mtime < bc->before || 0 == stat(datapath, &st) && st.st_mtime < bc->before)){ scanHintFile(bc->tree, i, hintpath, NULL); }else{ scanDataFileBefore(bc->tree, i, datapath, bc->before); } } } bc->curr = i; }
DataRecord* bc_get(Bitcask *bc, const char* key) { Item *item = ht_get(bc->tree, key); if (NULL == item) return NULL; if (item->ver < 0){ free(item); return NULL; } int bucket = item->pos & 0xff; uint32_t pos = item->pos & 0xffffff00; if (bucket > bc->curr) { fprintf(stderr, "BUG: invalid bucket %d > %d\n", bucket, bc->curr); ht_remove(bc->tree, key); free(item); return NULL; } DataRecord* r = NULL; if (bucket == bc->curr) { pthread_mutex_lock(&bc->buffer_lock); if (bucket == bc->curr && pos >= bc->wbuf_start_pos){ int p = pos - bc->wbuf_start_pos; r = decode_record(bc->write_buffer + p, bc->wbuf_curr_pos - p, true); } pthread_mutex_unlock(&bc->buffer_lock); if (r != NULL){ free(item); return r; } } char fname[20], data[255]; const char * path = mgr_base(bc->mgr); sprintf(fname, DATA_FILE, bucket); sprintf(data, "%s/%s", path, fname); int fd = open(data, O_RDONLY); if (-1 == fd){ goto GET_END; } r = fast_read_record(fd, pos, true); if (NULL == r){ fprintf(stderr, "Bug: get %s failed in %s %d %d\n", key, path, bucket, pos); }else{ // check key if (strcmp(key, r->key) != 0){ fprintf(stderr, "Bug: record %s is not expected %s\n", r->key, key); free_record(r); r = NULL; } } GET_END: if (NULL == r) ht_remove(bc->tree, key); if (fd != -1) close(fd); free(item); return r; }
uint64_t data_file_size(Bitcask *bc, int bucket) { struct stat st; char path[255]; gen_path(path, mgr_base(bc->mgr), DATA_FILE, bucket); if (stat(path, &st) != 0) return 0; return st.st_size; }
static inline char *new_path(char *dst, Mgr *mgr, const char *fmt, int i) { char *path = gen_path(dst, mgr_base(mgr), fmt, i); if (!file_exists(dst)) { char name[16]; sprintf(name, fmt, i); sprintf(path, "%s/%s", mgr_alloc(mgr, name), name); } return path; }
/* * bc_close() is not thread safe, should stop other threads before call it. * */ void bc_close(Bitcask *bc) { char datapath[255], hintpath[255]; if (bc->optimize_flag > 0) { bc->optimize_flag = 2; while (bc->optimize_flag > 0) { sleep(1); } } pthread_mutex_lock(&bc->write_lock); bc_flush(bc, 0, 0); if (NULL != bc->curr_tree) { if (bc->curr_bytes > 0) { build_hint(bc->curr_tree, new_path(hintpath, bc->mgr, HINT_FILE, bc->curr)); } else { ht_destroy(bc->curr_tree); } bc->curr_tree = NULL; } if (bc->curr_bytes == 0) bc->curr --; if (bc->curr - bc->last_snapshot >= SAVE_HTREE_LIMIT) { if (ht_save(bc->tree, new_path(datapath, bc->mgr, HTREE_FILE, bc->curr)) == 0) { mgr_unlink(gen_path(datapath, mgr_base(bc->mgr), HTREE_FILE, bc->last_snapshot)); } else { fprintf(stderr, "save HTree to %s failed\n", datapath); } } ht_destroy(bc->tree); mgr_destroy(bc->mgr); free(bc->write_buffer); free(bc); }
void bc_optimize(Bitcask *bc, int limit) { int i, total, last = -1; bc->optimize_flag = 1; const char *base = mgr_base(bc->mgr); // remove htree for (i=0; i < bc->curr; i++) { mgr_unlink(gen_path(NULL, base, HTREE_FILE, i)); } bc->last_snapshot = -1; time_t limit_time = 0; if (limit > 3600 * 24 * 365 * 10) // more than 10 years { limit_time = limit; // absolute time } else { limit_time = time(NULL) - limit; // relative time } struct stat st; bool skipped = false; for (i=0; i < bc->curr && bc->optimize_flag == 1; i++) { char datapath[255], hintpath[255]; gen_path(datapath, base, DATA_FILE, i); gen_path(hintpath, base, HINT_FILE, i); if (stat(datapath, &st) != 0) { continue; // skip empty file } // skip recent modified file if (st.st_mtime > limit_time) { skipped = true; last ++; if (last != i) // rotate data file { char npath[255]; gen_path(npath, base, DATA_FILE, last); if (symlink(datapath, npath) != 0) { fprintf(stderr, "symlink failed: %s -> %s\n", datapath, npath); last = i; continue; } // update HTree to use new index if (stat(hintpath, &st) != 0) { fprintf(stderr, "no hint file: %s, skip it\n", hintpath); last = i; continue; } HTree *tree = ht_new(bc->depth, bc->pos); scanHintFile(tree, i, hintpath, NULL); struct update_args args; args.tree = bc->tree; args.index = last; ht_visit(tree, update_item_pos, &args); ht_destroy(tree); unlink(npath); mgr_rename(datapath, npath); mgr_rename(hintpath, gen_path(npath, base, HINT_FILE, last)); } continue; } int deleted = count_deleted_record(bc->tree, i, hintpath, &total); uint64_t curr_size = data_file_size(bc, i) * (total - deleted/2) / (total+1); // guess uint64_t last_size = last >= 0 ? data_file_size(bc, last) : -1; // last data file size uint32_t recoverd = 0; if (last == -1 || last_size + curr_size > MAX_BUCKET_SIZE) { last ++; } while (last < i) { char ldpath[255], lhpath[255]; new_path(ldpath, bc->mgr, DATA_FILE, last); new_path(lhpath, bc->mgr, HINT_FILE, last); recoverd = optimizeDataFile(bc->tree, i, datapath, hintpath, skipped, MAX_BUCKET_SIZE, last, ldpath, lhpath); if (recoverd == 0) { last ++; } else { break; } } if (recoverd == 0) { // last == i recoverd = optimizeDataFile(bc->tree, i, datapath, hintpath, skipped, MAX_BUCKET_SIZE, last, NULL, NULL); } if (recoverd < 0) break; // failed pthread_mutex_lock(&bc->buffer_lock); bc->bytes -= recoverd; pthread_mutex_unlock(&bc->buffer_lock); } // update pos of items in curr_tree pthread_mutex_lock(&bc->write_lock); pthread_mutex_lock(&bc->flush_lock); if (i == bc->curr && ++last < bc->curr) { char opath[255], npath[255]; gen_path(opath, base, DATA_FILE, bc->curr); if (file_exists(opath)) { gen_path(npath, base, DATA_FILE, last); if (symlink(opath, npath) != 0) fprintf(stderr, "symlink failed: %s -> %s\n", opath, npath); struct update_args args; args.tree = bc->tree; args.index = last; ht_visit(bc->curr_tree, update_item_pos, &args); unlink(npath); mgr_rename(opath, npath); } bc->curr = last; } pthread_mutex_unlock(&bc->flush_lock); pthread_mutex_unlock(&bc->write_lock); bc->optimize_flag = 0; }
void bc_scan(Bitcask* bc) { char datapath[255], hintpath[255]; int i=0; struct stat st, hst; skip_empty_file(bc); const char* base = mgr_base(bc->mgr); // load snapshot of htree for (i=MAX_BUCKET_COUNT-1; i>=0; i--) { if (stat(gen_path(datapath, base, HTREE_FILE, i), &st) == 0 && stat(gen_path(hintpath, base, HINT_FILE, i), &hst) == 0 && st.st_mtime >= hst.st_mtime && (bc->before == 0 || st.st_mtime < bc->before)) { bc->tree = ht_open(bc->depth, bc->pos, datapath); if (bc->tree != NULL) { bc->last_snapshot = i; break; } else { fprintf(stderr, "open HTree from %s failed\n", datapath); mgr_unlink(datapath); } } } if (bc->tree == NULL) { bc->tree = ht_new(bc->depth, bc->pos); } for (i=0; i<MAX_BUCKET_COUNT; i++) { if (stat(gen_path(datapath, base, DATA_FILE, i), &st) != 0) { break; } bc->bytes += st.st_size; if (i <= bc->last_snapshot) continue; gen_path(hintpath, base, HINT_FILE, i); if (bc->before == 0) { if (0 == stat(hintpath, &st)) { scanHintFile(bc->tree, i, hintpath, NULL); } else { scanDataFile(bc->tree, i, datapath, new_path(hintpath, bc->mgr, HINT_FILE, i)); } } else { if (0 == stat(hintpath, &st) && (st.st_mtime < bc->before || 0 == stat(datapath, &st) && st.st_mtime < bc->before)) { scanHintFile(bc->tree, i, hintpath, NULL); } else { scanDataFileBefore(bc->tree, i, datapath, bc->before); } } } if (i - bc->last_snapshot > SAVE_HTREE_LIMIT) { if (ht_save(bc->tree, new_path(datapath, bc->mgr, HTREE_FILE, i-1)) == 0) { mgr_unlink(gen_path(NULL, base, HTREE_FILE, bc->last_snapshot)); bc->last_snapshot = i-1; } else { fprintf(stderr, "save HTree to %s failed\n", datapath); } } bc->curr = i; }