bool oc_bpt_dbg_validate_clones_b( struct Oc_wu *wu_p, int n_clones, Oc_bpt_state *st_array[]) { bool rc; int i; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_VALIDATE_CLONES, wu_p, ""); // lock all the clones for (i=0; i<n_clones; i++) { Oc_bpt_state *s_p = st_array[i]; oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); } rc = oc_bpt_op_validate_clones_b(wu_p, n_clones, st_array); // unlock all the clones for (i=0; i<n_clones; i++) { Oc_bpt_state *s_p = st_array[i]; oc_utl_trk_crt_unlock(wu_p, &s_p->lock); } return rc; }
/* Delete a sub-tree rooted at [addr]. * * Recursive decent through the tree. Remove the leaves and perform * the user-defined "data_release" function for all data. Deallocate all * internal nodes. */ void oc_bpt_utl_delete_subtree_b( struct Oc_wu *wu_p, struct Oc_bpt_state *s_p, Oc_bpt_node *node_p) { int fs_refcnt = s_p->cfg_p->fs_get_refcount(wu_p, node_p->disk_addr); oc_utl_debugassert(fs_refcnt > 0); if (1 == fs_refcnt) { /* If this node is referenced from a single snapshot only, * Then recurse down and then delete it on the way back up. */ if (!oc_bpt_nd_is_leaf(s_p, node_p)) { // An index node, recurse through its children int i; int num_entries = oc_bpt_nd_num_entries(s_p, node_p); Oc_bpt_node *child_node_p; struct Oc_bpt_key *dummy_key_p; uint64 child_addr; for (i=0; i< num_entries; i++) { oc_bpt_nd_index_get_kth(s_p, node_p, i, &dummy_key_p, &child_addr); child_node_p = oc_bpt_nd_get_for_read(wu_p, s_p, child_addr); oc_bpt_utl_delete_subtree_b(wu_p, s_p, child_node_p); } } } // reduce the ref-count on this node oc_bpt_nd_delete(wu_p, s_p, node_p); }
int oc_bpt_insert_range_b( struct Oc_wu *wu_p, Oc_bpt_state *s_p, int length, struct Oc_bpt_key *key_array, struct Oc_bpt_data *data_array) { int rc; if (0 == length) return 0; oc_bpt_trace_wu_lvl( 2, OC_EV_BPT_INSERT_RANGE, wu_p, "tid=%Lu [%s]", s_p->tid, oc_bpt_nd_string_of_2key( s_p, oc_bpt_nd_key_array_kth(s_p, key_array, 0), oc_bpt_nd_key_array_kth(s_p, key_array, length-1))); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_read(wu_p, &s_p->lock); rc = oc_bpt_op_insert_range_b(wu_p, s_p, length, key_array, data_array); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); oc_bpt_trace_wu_lvl(3, OC_EV_BPT_INSERT_RANGE, wu_p, "rc=%d", rc); return rc; }
void oc_bpt_lookup_range_b( struct Oc_wu *wu_p, Oc_bpt_state *s_p, struct Oc_bpt_key *min_key_p, struct Oc_bpt_key *max_key_p, int max_num_keys_i, struct Oc_bpt_key *key_array_po, struct Oc_bpt_data *data_array_po, int *nkeys_found_po) { struct Oc_bpt_op_lookup_range lkr; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_LOOKUP_RANGE, wu_p, "tid=%Lu [%s]", s_p->tid, oc_bpt_nd_string_of_2key(s_p, min_key_p, max_key_p)); oc_utl_debugassert(s_p->cfg_p->initialized); lkr.min_key_p = min_key_p; lkr.max_key_p = max_key_p; lkr.max_num_keys_i = max_num_keys_i; lkr.key_array_po = key_array_po; lkr.data_array_po = data_array_po; lkr.nkeys_found_po = nkeys_found_po; // There are too many arguments, we stuff them into a single structure oc_utl_trk_crt_lock_read(wu_p, &s_p->lock); oc_bpt_op_lookup_range_b(wu_p, s_p, &lkr); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); }
/// Create a b-tree whose root is in address [addr] void oc_bpt_init_map( struct Oc_wu *wu_p, Oc_bpt_cfg *cfg_p, uint64 addr) { oc_utl_debugassert(cfg_p->initialized); oc_bpt_nd_init_map(wu_p, cfg_p, addr); }
void oc_bpt_statistics_b( struct Oc_wu *wu_p, Oc_bpt_state *s_p) { Oc_bpt_statistics statistics; oc_utl_debugassert(s_p->cfg_p->initialized); return oc_bpt_op_statistics_b(wu_p,s_p, &statistics); }
void oc_bpt_destroy_state(struct Oc_wu *wu_pi, Oc_bpt_state *s_p) { oc_bpt_trace_wu_lvl(3, OC_EV_BPT_DESTROY_STATE, wu_pi, "root_node=%Lu", s_p->root_node_p); oc_utl_debugassert(s_p->root_node_p); // release the root node s_p->cfg_p->node_release(wu_pi, s_p->root_node_p); s_p->root_node_p = NULL; }
void oc_bpt_dbg_output_b( struct Oc_wu *wu_p, struct Oc_bpt_state *s_p, char *tag_p) { oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); oc_bpt_op_output_dot_b(wu_p, s_p, tag_p); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); }
uint64 oc_bpt_clone_b( struct Oc_wu *wu_p, Oc_bpt_state *src_p, Oc_bpt_state *trg_p) { // make sure the configurations are equivalent oc_utl_assert(trg_p->cfg_p == src_p->cfg_p); oc_bpt_trace_wu_lvl(2, OC_EV_BPT_CLONE, wu_p, "tid=%Lu -> tid=%Lu", src_p->tid, trg_p->tid); oc_utl_debugassert(src_p->cfg_p->initialized); oc_utl_debugassert(trg_p->cfg_p->initialized); oc_utl_assert(NULL == trg_p->root_node_p); oc_utl_trk_crt_lock_write(wu_p, &src_p->lock); oc_bpt_nd_clone_root(wu_p, src_p, trg_p); oc_utl_trk_crt_unlock(wu_p, &src_p->lock); return trg_p->root_node_p->disk_addr; }
void oc_xt_op_lookup_range_b( struct Oc_wu *wu_p, Oc_xt_state *s_p, Oc_xt_op_lookup_range *lkr_p ) { bool rc; struct Oc_xt_key *cursor_p; *lkr_p->nx_found_po = 0; if (0 == lkr_p->max_num_keys_i || s_p->cfg_p->key_compare(lkr_p->min_key_p, lkr_p->max_key_p) == -1) return; if (oc_xt_nd_num_entries(s_p, s_p->root_node_p) == 0) { return; } cursor_p = (struct Oc_xt_key*)alloca(s_p->cfg_p->key_size); memcpy((char*)cursor_p, (char*)lkr_p->min_key_p, s_p->cfg_p->key_size); while (*lkr_p->nx_found_po < lkr_p->max_num_keys_i) { struct Oc_xt_key *max_key_so_far_p; struct Oc_xt_rcrd *max_rcrd_so_far_p; rc = mini_lookup_b(wu_p, s_p, lkr_p); if (!rc) // no more keys found in the range, we're done return; /* Update the minimal key searched for. * After the first search we move the minimal-key * forward. */ oc_utl_debugassert(*lkr_p->nx_found_po > 0); max_key_so_far_p = oc_xt_nd_key_array_kth( s_p, lkr_p->key_array_po, *lkr_p->nx_found_po - 1); max_rcrd_so_far_p = oc_xt_nd_rcrd_array_kth( s_p, lkr_p->rcrd_array_po, *lkr_p->nx_found_po - 1); s_p->cfg_p->rcrd_end_offset(max_key_so_far_p, max_rcrd_so_far_p, cursor_p); s_p->cfg_p->key_inc(cursor_p, cursor_p); if (s_p->cfg_p->key_compare(cursor_p, lkr_p->max_key_p) == -1) // We have found all there is to find return; lkr_p->min_key_p = cursor_p; } }
bool oc_bpt_dbg_validate_b( struct Oc_wu *wu_p, Oc_bpt_state *s_p) { bool rc; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_VALIDATE, wu_p, "tid=%Lu", s_p->tid); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); rc = oc_bpt_op_validate_b(wu_p, s_p); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); return rc; }
static Oc_bpt_node* node_get_xl(Oc_wu *wu_p, uint64 addr) { Oc_bpt_node *node_p; while (1) { node_p = node_get(wu_p, addr); oc_utl_trk_crt_lock_write(wu_p, &node_p->lock); if (node_p->disk_addr != addr) { node_release(wu_p, node_p); } else break; } oc_utl_debugassert(addr == node_p->disk_addr); return node_p; }
bool oc_bpt_remove_key_b( struct Oc_wu *wu_p, struct Oc_bpt_state *s_p, struct Oc_bpt_key *key_p) { bool rc; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_REMOVE_KEY, wu_p, "tid=%Lu %s", s_p->tid, oc_bpt_nd_string_of_key(s_p, key_p)); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_read(wu_p, &s_p->lock); rc = oc_bpt_op_remove_key_b(wu_p, s_p, key_p); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); return rc; }
uint64 oc_bpt_create_b( struct Oc_wu *wu_p, Oc_bpt_state *s_p) { oc_utl_assert(NULL == s_p->root_node_p); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); { s_p->root_node_p = s_p->cfg_p->node_alloc(wu_p); oc_bpt_nd_create_root(wu_p, s_p, s_p->root_node_p); oc_utl_trk_crt_unlock(wu_p, &s_p->root_node_p->lock); } oc_utl_trk_crt_unlock(wu_p, &s_p->lock); return s_p->root_node_p->disk_addr; }
bool oc_bpt_lookup_key_b( struct Oc_wu *wu_p, struct Oc_bpt_state *s_p, struct Oc_bpt_key *key_p, struct Oc_bpt_data *data_po) { bool rc; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_LOOKUP_KEY, wu_p, "tid=%Lu key=%s", s_p->tid, oc_bpt_nd_string_of_key(s_p, key_p)); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_read(wu_p, &s_p->lock); rc = oc_bpt_op_lookup_b(wu_p, s_p, key_p, data_po); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); return rc; }
int oc_bpt_remove_range_b( struct Oc_wu *wu_p, Oc_bpt_state *s_p, struct Oc_bpt_key *min_key_p, struct Oc_bpt_key *max_key_p) { int rc; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_REMOVE_RANGE, wu_p, "tid=%Lu [%s]", s_p->tid, oc_bpt_nd_string_of_2key(s_p, min_key_p, max_key_p)); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); rc = oc_bpt_op_remove_range_b(wu_p, s_p, min_key_p, max_key_p); oc_utl_trk_crt_unlock(wu_p, &s_p->lock); return rc; }
void oc_bpt_delete_b( struct Oc_wu *wu_p, struct Oc_bpt_state *s_p) { oc_bpt_trace_wu_lvl(2, OC_EV_BPT_DELETE, wu_p, "tid=%Lu", s_p->tid); oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); /* We need to upgrade the root-lock to a shared-lock * because the delete-code unlocks all the pages after * deleting them. This means that the root cannot remain * unlocked, as usual. */ oc_utl_trk_crt_lock_read(wu_p, &s_p->root_node_p->lock); oc_bpt_utl_delete_subtree_b(wu_p, s_p, s_p->root_node_p); s_p->root_node_p = NULL; oc_utl_trk_crt_unlock(wu_p, &s_p->lock); }
void oc_bpt_cow_root_and_update_b( struct Oc_wu *wu_p, struct Oc_bpt_state *s_p, struct Oc_bpt_data *father_data_p, int size) { // Old disk-addr stored as data in father uint64 prev_addr = *((uint64*)father_data_p); Oc_bpt_node *node_p; int fs_refcnt; oc_bpt_trace_wu_lvl(2, OC_EV_BPT_COW_ROOT_AND_UPDATE, wu_p, "lba of root as appers in father (before update):%llu", prev_addr); oc_utl_assert(NULL != s_p->root_node_p); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); node_p = s_p->cfg_p->node_get_xl(wu_p, s_p->root_node_p->disk_addr); oc_utl_debugassert(node_p == s_p->root_node_p); fs_refcnt = s_p->cfg_p->fs_get_refcount( wu_p, s_p->root_node_p->disk_addr); s_p->cfg_p->node_mark_dirty(wu_p, s_p->root_node_p, (fs_refcnt > 1)); if (s_p->root_node_p->disk_addr != prev_addr) { uint64 new_addr = s_p->root_node_p->disk_addr; memcpy((char*)father_data_p, &new_addr, size); } oc_bpt_trace_wu_lvl(2, OC_EV_BPT_COW_ROOT_AND_UPDATE, wu_p, "lba of root as appers in father (AFTER update):%llu", *((uint64*)father_data_p)); oc_bpt_nd_release(wu_p, s_p, s_p->root_node_p);// Dalit: may not be needed oc_utl_trk_crt_unlock(wu_p, &s_p->lock); // Dalit: may not be needed }
void oc_bpt_dbg_output_clones_b( struct Oc_wu *wu_p, int n_clones, struct Oc_bpt_state *st_array[], char *tag_p) { int i; for (i=0 ; i < n_clones; i++) { struct Oc_bpt_state *s_p = st_array[i]; oc_utl_debugassert(s_p->cfg_p->initialized); oc_utl_trk_crt_lock_write(wu_p, &s_p->lock); } oc_bpt_op_output_clones_dot_b(wu_p, n_clones, st_array, tag_p); for (i=0 ; i < n_clones; i++) { struct Oc_bpt_state *s_p = st_array[i]; oc_utl_trk_crt_unlock(wu_p, &s_p->lock); } }
void oc_bpt_test_fs_verify(num_blocks) { oc_utl_debugassert(ctx_p); oc_utl_assert(num_blocks == ctx_p->tot_alloc); }
/* search in [node_p] for keys in the range. * copy extents into the output arrays. * update the total count of keys found [nx_found_po]. * * return TRUE if any entries were found. Return FALSE otherwise. * * note: the caller may specify if the minimum-key is included or not. */ static bool search_in_leaf( struct Oc_wu *wu_p, Oc_xt_state *s_p, Oc_xt_node *node_p, Oc_xt_op_lookup_range *lkr_p ) { int loc_lo, loc_hi, i, cursor_keys, cursor_rcrd; struct Oc_xt_key *key_p; struct Oc_xt_rcrd *rcrd_p; oc_utl_debugassert(oc_xt_nd_is_leaf(s_p, node_p)); oc_utl_debugassert(*(lkr_p->nx_found_po) < lkr_p->max_num_keys_i); oc_xt_trace_wu_lvl( 3, OC_EV_XT_LOOKUP_RNG_SEARCH_LEAF, wu_p, "leaf=[%s] range=[%s] found_so_far=%d", oc_xt_nd_string_of_node(s_p, node_p), oc_xt_nd_string_of_2key(s_p, lkr_p->min_key_p, lkr_p->max_key_p), *lkr_p->nx_found_po); // find the first extent that is greater or equal than [min_key_p] loc_lo = oc_xt_nd_leaf_lookup_ge_key(wu_p, s_p, node_p, lkr_p->min_key_p); if (-1 == loc_lo) // nothing matching return FALSE; // find the first extent that is smaller or equal than [max_key_p] loc_hi = oc_xt_nd_leaf_lookup_le_key(wu_p, s_p, node_p, lkr_p->max_key_p); if (-1 == loc_hi) // nothing matching return FALSE; // make sure that there is something in the range. if (loc_lo > loc_hi) return FALSE; oc_utl_debugassert(loc_lo < oc_xt_nd_num_entries(s_p, node_p)); oc_utl_debugassert(0 <= loc_lo); oc_utl_debugassert(loc_hi < oc_xt_nd_num_entries(s_p, node_p)); oc_xt_trace_wu_lvl(3, OC_EV_XT_LOOKUP_RNG, wu_p, "loc_lo=%d loc_hi=%d sum=%d", loc_lo, loc_hi, loc_hi-loc_lo+1); // Copy all entries between [loc_lo] and [loc_hi] /* The first and last extents may be a partial match, they require * special attention. */ oc_xt_nd_leaf_get_kth(s_p, node_p, loc_lo, &key_p, &rcrd_p); copy_partial_ext(wu_p, s_p, key_p, rcrd_p, lkr_p->min_key_p, lkr_p->max_key_p, lkr_p); // return early if we found only a single extent if (loc_lo == loc_hi) return TRUE; /* copy the set of middle extents, they are completely between lo-key and * hi-key. */ cursor_keys = (*lkr_p->nx_found_po) * s_p->cfg_p->key_size; cursor_rcrd = (*lkr_p->nx_found_po) * s_p->cfg_p->rcrd_size; for (i=loc_lo+1; i<=loc_hi-1 && *lkr_p->nx_found_po < lkr_p->max_num_keys_i; i++) { // get the next entry in the page oc_xt_nd_leaf_get_kth(s_p, node_p, i, &key_p, &rcrd_p); memcpy((char*)lkr_p->key_array_po + cursor_keys, (char*)key_p, s_p->cfg_p->key_size); memcpy((char*)lkr_p->rcrd_array_po + cursor_rcrd, (char*)rcrd_p, s_p->cfg_p->rcrd_size); *lkr_p->nx_found_po = *lkr_p->nx_found_po + 1; cursor_keys += s_p->cfg_p->key_size; cursor_rcrd += s_p->cfg_p->rcrd_size; } // copy the last extent oc_xt_nd_leaf_get_kth(s_p, node_p, loc_hi, &key_p, &rcrd_p); copy_partial_ext(wu_p, s_p, key_p, rcrd_p, lkr_p->min_key_p, lkr_p->max_key_p, lkr_p); return TRUE; }