rc_t btree_impl::_ux_assure_fence_low_entry(btree_page_h &leaf) { w_assert1(leaf.is_fixed()); w_assert1(leaf.latch_mode() == LATCH_EX); if (!leaf.is_leaf()) { // locks are taken only for leaf-page entries. this case isn't an issue return RCOK; } w_keystr_t fence_low; leaf.copy_fence_low_key(fence_low); bool needs_to_create = false; if (leaf.nrecs() == 0) { if (leaf.compare_with_fence_high(fence_low) == 0) { // low==high happens only during page split. In that case, no one can have a lock // in the page being created. No need to assure the record. return RCOK; } needs_to_create = true; } else { w_keystr_t first_key; leaf.get_key(0, first_key); w_assert1(fence_low.compare(first_key) <= 0); // can't be fence_low>first_key if (fence_low.compare(first_key) < 0) { // fence-low doesn't exist as an entry! needs_to_create = true; } } if (needs_to_create) { W_DO(_sx_reserve_ghost(leaf, fence_low, 0)); // no data is needed } return RCOK; }
rc_t btree_impl::_ux_lock_range(const StoreID& stid, btree_page_h& leaf, const void* keystr, size_t keylen, slotid_t slot, latch_mode_t latch_mode, const okvl_mode& exact_hit_lock_mode, const okvl_mode& miss_lock_mode, bool check_only) { w_assert1(slot >= -1 && slot <= leaf.nrecs()); w_assert1(exact_hit_lock_mode.get_gap_mode() == okvl_mode::N); w_assert1(miss_lock_mode.is_keylock_empty()); if (slot == -1) { // this means we should search it again bool found; leaf.search((const char *) keystr, keylen, found, slot); w_assert1(!found); // precondition } w_assert1(slot >= 0 && slot <= leaf.nrecs()); #if W_DEBUG_LEVEL > 1 w_keystr_t key, key_at_slot; key.construct_from_keystr(keystr, keylen); if (slot<leaf.nrecs()) { leaf.get_key(slot, key_at_slot); w_assert1(key_at_slot.compare(key)>0); } #endif // W_DEBUG_LEVEL > 1 slot--; // want range lock from previous key if (slot == -1 && w_keystr_t::compare_bin_str(keystr, keylen, leaf.get_fence_low_key(), leaf.get_fence_low_length()) == 0) { // We were searching for the low-fence key! then, we take key lock on it and // subsequent structural modification (e.g., merge) will add the low-fence as // ghost record to be aware of the lock. W_DO (_ux_lock_key(stid, leaf, leaf.get_fence_low_key(), leaf.get_fence_low_length(), latch_mode, exact_hit_lock_mode, check_only)); } else { w_keystr_t prevkey; if (slot == -1) { leaf.copy_fence_low_key(prevkey); } else { leaf.get_key(slot, prevkey); } #if W_DEBUG_LEVEL > 1 w_assert1(prevkey.compare(key) < 0); #endif // W_DEBUG_LEVEL > 1 W_DO (_ux_lock_key(stid, leaf, prevkey, latch_mode, miss_lock_mode, check_only)); } return RCOK; }
rc_t bt_cursor_t::_advance_one_slot(btree_page_h &p, bool &eof) { w_assert1(p.is_fixed()); w_assert1(_slot <= p.nrecs()); if(_forward) { ++_slot; } else { --_slot; } eof = false; // keep following the next page. // because we might see empty pages to skip consecutively! while (true) { bool time2move = _forward ? (_slot >= p.nrecs()) : _slot < 0; if (time2move) { // Move to right(left) sibling bool reached_end = _forward ? p.is_fence_high_supremum() : p.is_fence_low_infimum(); if (reached_end) { eof = true; return RCOK; } // now, use fence keys to tell where the neighboring page exists w_keystr_t neighboring_fence; btree_impl::traverse_mode_t traverse_mode; bool only_low_fence_exact_match = false; if (_forward) { p.copy_fence_high_key(neighboring_fence); traverse_mode = btree_impl::t_fence_low_match; int d = _upper.compare(neighboring_fence); if (d < 0 || (d == 0 && !_upper_inclusive)) { eof = true; return RCOK; } if (d == 0 && _upper_inclusive) { // we will check the next page, but the only // possible matching is an entry with // the low-fence.. only_low_fence_exact_match = true; } } else { // if we are going backwards, the current page had // low = [current-fence-low], high = [current-fence-high] // and the previous page should have // low = [?], high = [current-fence-low]. p.copy_fence_low_key(neighboring_fence); // let's find a page which has this value as high-fence traverse_mode = btree_impl::t_fence_high_match; int d = _lower.compare(neighboring_fence); if (d >= 0) { eof = true; return RCOK; } } p.unfix(); // take lock for the fence key if (_needs_lock) { lockid_t lid (_store, (const unsigned char*) neighboring_fence.buffer_as_keystr(), neighboring_fence.get_length_as_keystr()); okvl_mode lock_mode; if (only_low_fence_exact_match) { lock_mode = _ex_lock ? ALL_X_GAP_N: ALL_S_GAP_N; } else { lock_mode = _ex_lock ? ALL_X_GAP_X : ALL_S_GAP_S; } // we can unconditionally request lock because we already released latch W_DO(ss_m::lm->lock(lid.hash(), lock_mode, true, true, true)); } // TODO this part should check if we find an exact match of fence keys. // because we unlatch above, it's possible to not find exact match. // in that case, we should change the traverse_mode to fence_contains and continue W_DO(btree_impl::_ux_traverse(_store, neighboring_fence, traverse_mode, LATCH_SH, p)); _slot = _forward ? 0 : p.nrecs() - 1; _set_current_page(p); continue; } // take lock on the next key. // NOTE: until we get locks, we aren't sure the key really becomes // the next key. So, we use the temporary variable _tmp_next_key_buf. const okvl_mode *mode = NULL; { p.get_key(_slot, _tmp_next_key_buf); if (_forward) { int d = _tmp_next_key_buf.compare(_upper); if (d < 0) { mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S; } else if (d == 0 && _upper_inclusive) { mode = _ex_lock ? &ALL_X_GAP_N : &ALL_S_GAP_N; } else { eof = true; mode = &ALL_N_GAP_N; } } else { int d = _tmp_next_key_buf.compare(_lower); if (d > 0) { mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S; } else if (d == 0 && _lower_inclusive) { mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S; } else { eof = true; mode = _ex_lock ? &ALL_N_GAP_X : &ALL_N_GAP_S; } } } if (_needs_lock && !mode->is_empty()) { rc_t rc = btree_impl::_ux_lock_key (_store, p, _tmp_next_key_buf, LATCH_SH, *mode, false); if (rc.is_error()) { if (rc.err_num() == eLOCKRETRY) { W_DO(_check_page_update(p)); continue; } else { return rc; } } } // okay, now we are sure the _tmp_next_key_buf is the key we want to use _key = _tmp_next_key_buf; return RCOK; // found a record! (or eof) } return RCOK; }