// Create a new red-black tree. as_index_tree * as_index_tree_create(cf_arenax *arena, as_index_value_destructor destructor, void *destructor_udata, as_treex *p_treex) { as_index_tree *tree = cf_rc_alloc(sizeof(as_index_tree)); if (! tree) { return NULL; } pthread_mutex_init(&tree->lock, NULL); pthread_mutex_init(&tree->reduce_lock, NULL); tree->arena = arena; // Make the sentinel element. tree->sentinel_h = cf_arenax_alloc(arena); if (tree->sentinel_h == 0) { cf_rc_free(tree); return NULL; } as_index *sentinel = RESOLVE_H(tree->sentinel_h); memset(sentinel, 0, sizeof(as_index)); sentinel->left_h = sentinel->right_h = tree->sentinel_h; sentinel->color = AS_BLACK; // Make the fixed root element. tree->root_h = cf_arenax_alloc(arena); if (tree->root_h == 0) { cf_arenax_free(arena, tree->sentinel_h); cf_rc_free(tree); return NULL; } tree->root = RESOLVE_H(tree->root_h); memset(tree->root, 0, sizeof(as_index)); tree->root->left_h = tree->root->right_h = tree->sentinel_h; tree->root->color = AS_BLACK; tree->destructor = destructor; tree->destructor_udata = destructor_udata; tree->elements = 0; if (p_treex) { // Update the tree information in persistent memory. p_treex->sentinel_h = tree->sentinel_h; p_treex->root_h = tree->root_h; } return tree; }
// Destroy a red-black tree; return 0 if the tree was destroyed or 1 otherwise. // TODO - nobody cares about the return value, make it void? int as_index_tree_release(as_index_tree *tree, void *destructor_udata) { if (0 != cf_rc_release(tree)) { return 1; } as_index_tree_purge(tree, RESOLVE_H(tree->root->left_h), tree->root->left_h); cf_arenax_free(tree->arena, tree->root_h); cf_arenax_free(tree->arena, tree->sentinel_h); pthread_mutex_destroy(&tree->lock); pthread_mutex_destroy(&tree->reduce_lock); memset(tree, 0, sizeof(as_index_tree)); // paranoia - for debugging only cf_rc_free(tree); return 0; }
// Done with record - unlock. If record was removed from tree and is not // reserved (by reduce), destroy record and free arena element. void as_record_done(as_index_ref *r_ref, as_namespace *ns) { as_record *r = r_ref->r; if (! as_index_is_valid_record(r) && r->rc == 0) { as_record_destroy(r, ns); cf_arenax_free(ns->arena, r_ref->r_h, r_ref->puddle); } cf_mutex_unlock(r_ref->olock); }
/* * Create a tree "stub" for the storage has index case. * Returns: 1 = new * 0 = success (found) * -1 = fail */ int as_index_ref_initialize(as_index_tree *tree, cf_digest *key, as_index_ref *index_ref, bool create_p, as_namespace *ns) { /* Allocate memory for the new node and set the node parameters */ cf_arenax_handle n_h = cf_arenax_alloc(tree->arena); if (0 == n_h) { // cf_debug(AS_INDEX," malloc failed "); return(-1); } as_index *n = RESOLVE_H(n_h); n->key = *key; n->rc = 1; n->left_h = n->right_h = tree->sentinel_h; n->color = AS_RED; n->parent_h = tree->sentinel_h; if (AS_STORAGE_ENGINE_KV == ns->storage_type) n->storage_key.kv.file_id = STORAGE_INVALID_FILE_ID; // careful here - this is now unsigned else cf_crash(AS_INDEX, "non-KV storage type ns %s key %p", ns->name, key); index_ref->r = n; index_ref->r_h = n_h; if (!index_ref->skip_lock) { olock_vlock(g_config.record_locks, key, &(index_ref->olock)); cf_atomic_int_incr(&g_config.global_record_lock_count); } as_index_reserve(n); cf_atomic_int_add(&g_config.global_record_ref_count, 2); int rv = !as_storage_record_exists(ns, key); // Unlock if not found and we're not creating it. if (rv && !create_p) { if (!index_ref->skip_lock) { pthread_mutex_unlock(index_ref->olock); cf_atomic_int_decr(&g_config.global_record_lock_count); } as_index_release(n); cf_atomic_int_decr(&g_config.global_record_ref_count); cf_arenax_free(tree->arena, n_h); index_ref->r = 0; index_ref->r_h = 0; } return(rv); }
void as_index_tree_purge(as_index_tree *tree, as_index *r, cf_arenax_handle r_h) { // Don't purge the sentinel. if (r_h == tree->sentinel_h) { return; } as_index_tree_purge(tree, RESOLVE_H(r->left_h), r->left_h); as_index_tree_purge(tree, RESOLVE_H(r->right_h), r->right_h); if (0 == as_index_release(r)) { if (tree->destructor) { tree->destructor(r, tree->destructor_udata); } cf_arenax_free(tree->arena, r_h); } cf_atomic_int_decr(&g_config.global_record_ref_count); }
// If there's an element with specified digest in the tree, delete it. // // Returns: // 0 - found and deleted // -1 - not found // TODO - nobody cares about the return value, make it void? int as_index_delete(as_index_tree *tree, cf_digest *keyd) { as_index *r; cf_arenax_handle r_h; bool retry; // Save parents as we search for the specified element (or its successor). as_index_ele eles[(64 * 2) + 3]; as_index_ele *ele; do { ele = eles; pthread_mutex_lock(&tree->lock); ele->parent = NULL; // we'll never look this far up ele->me_h = tree->root_h; ele->me = tree->root; r_h = tree->root->left_h; r = RESOLVE_H(r_h); while (r_h != tree->sentinel_h) { ele++; ele->parent = ele - 1; ele->me_h = r_h; ele->me = r; int cmp = cf_digest_compare(keyd, &r->key); if (cmp == 0) { break; // found, we'll be deleting it } r_h = cmp > 0 ? r->left_h : r->right_h; r = RESOLVE_H(r_h); } if (r_h == tree->sentinel_h) { pthread_mutex_unlock(&tree->lock); return -1; // not found, nothing to delete } // We found the tree element, so we'll be deleting it. retry = false; if (EBUSY == pthread_mutex_trylock(&tree->reduce_lock)) { // The tree is being reduced - could take long, unlock so reads and // overwrites aren't blocked. pthread_mutex_unlock(&tree->lock); // Wait until the tree reduce is done... pthread_mutex_lock(&tree->reduce_lock); pthread_mutex_unlock(&tree->reduce_lock); // ... and start over - we unlocked, so the tree may have changed. retry = true; } } while (retry); // Delete the element. // Snapshot the element to delete, r. (Already have r_h and r shortcuts.) as_index_ele *r_e = ele; if (r->left_h != tree->sentinel_h && r->right_h != tree->sentinel_h) { // Search down for a "successor"... ele++; ele->parent = ele - 1; ele->me_h = r->right_h; ele->me = RESOLVE_H(ele->me_h); while (ele->me->left_h != tree->sentinel_h) { ele++; ele->parent = ele - 1; ele->me_h = ele->parent->me->left_h; ele->me = RESOLVE_H(ele->me_h); } } // else ele is left at r, i.e. s == r // Snapshot the successor, s. (Note - s could be r.) as_index_ele *s_e = ele; cf_arenax_handle s_h = s_e->me_h; as_index *s = s_e->me; // Get the appropriate child of s. (Note - child could be sentinel.) ele++; if (s->left_h == tree->sentinel_h) { ele->me_h = s->right_h; } else { ele->me_h = s->left_h; } ele->me = RESOLVE_H(ele->me_h); // Cut s (remember, it could be r) out of the tree. ele->parent = s_e->parent; if (s_h == s_e->parent->me->left_h) { s_e->parent->me->left_h = ele->me_h; } else { s_e->parent->me->right_h = ele->me_h; } // Rebalance at ele if necessary. (Note - if r != s, r is in the tree, and // its parent may change during rebalancing.) if (s->color == AS_BLACK) { as_index_delete_rebalance(tree, ele); } if (s != r) { // s was a successor distinct from r, put it in r's place in the tree. s->left_h = r->left_h; s->right_h = r->right_h; s->color = r->color; if (r_h == r_e->parent->me->left_h) { r_e->parent->me->left_h = s_h; } else { r_e->parent->me->right_h = s_h; } } // We may now destroy r, which is no longer in the tree. if (0 == as_index_release(r)) { if (tree->destructor) { tree->destructor(r, tree->destructor_udata); } cf_arenax_free(tree->arena, r_h); } cf_atomic_int_decr(&g_config.global_record_ref_count); tree->elements--; pthread_mutex_unlock(&tree->reduce_lock); pthread_mutex_unlock(&tree->lock); return 0; }