Ejemplo n.º 1
0
rc_t btree_impl::_ux_adopt_foster_core (btree_page_h &parent, btree_page_h &child,
    const w_keystr_t &new_child_key)
{
    w_assert1 (g_xct()->is_single_log_sys_xct());
    w_assert1 (parent.is_fixed());
    w_assert1 (parent.latch_mode() == LATCH_EX);
    w_assert1 (parent.is_node());
    w_assert1 (child.is_fixed());
    w_assert1 (child.latch_mode() == LATCH_EX);
    w_assert0 (child.get_foster() != 0);

    PageID new_child_pid = child.get_foster();
    if (smlevel_0::bf->is_swizzled_pointer(new_child_pid)) {
        smlevel_0::bf->unswizzle(parent.get_generic_page(),
                GeneralRecordIds::FOSTER_CHILD, true, &new_child_pid);
    }
    w_assert1(!smlevel_0::bf->is_swizzled_pointer(new_child_pid));

    lsn_t child_emlsn = child.get_foster_emlsn();
    W_DO(log_btree_foster_adopt (parent, child, new_child_pid, child_emlsn, new_child_key));
    _ux_adopt_foster_apply_parent (parent, new_child_pid, child_emlsn, new_child_key);
    _ux_adopt_foster_apply_child (child);

    // Switch parent of newly adopted child
    // CS TODO: I'm not sure we can do this because we don't hold a latch on new_child_pid
    smlevel_0::bf->switch_parent(new_child_pid, parent.get_generic_page());

    w_assert3(parent.is_consistent(true, true));
    w_assert3(child.is_consistent(true, true));
    return RCOK;
}
Ejemplo n.º 2
0
rc_t btree_impl::_sx_adopt_foster_all_core (
    btree_page_h &parent, bool is_root, bool recursive)
{
    // TODO this should use the improved tree-walk-through
    // See jira ticket:60 "Tree walk-through without more than 2 pages latched" (originally trac ticket:62)
    w_assert1 (xct()->is_sys_xct());
    w_assert1 (parent.is_fixed());
    w_assert1 (parent.latch_mode() == LATCH_EX);
    if (parent.is_node()) {
        w_assert1(parent.pid0());
        W_DO(_sx_adopt_foster_sweep(parent));
        if (recursive) {
            // also adopt at all children recursively
            for (int i = -1; i < parent.nrecs(); ++i) {
                btree_page_h child;
                PageID shpid_opaqueptr = i == -1 ? parent.get_foster_opaqueptr() : parent.child_opaqueptr(i);
                W_DO(child.fix_nonroot(parent, shpid_opaqueptr, LATCH_EX));
                W_DO(_sx_adopt_foster_all_core(child, false, true));
            }
        }

    }
    // after all adopts, if this parent is the root and has foster,
    // let's grow the tree
    if  (is_root && parent.get_foster()) {
        W_DO(_sx_grow_tree(parent));
        W_DO(_sx_adopt_foster_sweep(parent));
    }
    w_assert3(parent.is_consistent(true, true));
    return RCOK;
}
Ejemplo n.º 3
0
rc_t btree_impl::_ux_assure_fence_low_entry(btree_page_h &leaf)
{
    w_assert1(leaf.is_fixed());
    w_assert1(leaf.latch_mode() == LATCH_EX);
    if (!leaf.is_leaf()) {
        // locks are taken only for leaf-page entries. this case isn't an issue
        return RCOK;
    }
    w_keystr_t fence_low;
    leaf.copy_fence_low_key(fence_low);
    bool needs_to_create = false;
    if (leaf.nrecs() == 0) {
        if (leaf.compare_with_fence_high(fence_low) == 0) {
            // low==high happens only during page split. In that case, no one can have a lock
            // in the page being created. No need to assure the record.
            return RCOK;
        }
        needs_to_create = true;
    } else {
        w_keystr_t first_key;
        leaf.get_key(0, first_key);
        w_assert1(fence_low.compare(first_key) <= 0); // can't be fence_low>first_key
        if (fence_low.compare(first_key) < 0) {
            // fence-low doesn't exist as an entry!
            needs_to_create = true;
        }
    }
    if (needs_to_create) {
        W_DO(_sx_reserve_ghost(leaf, fence_low, 0)); // no data is needed
    }
    return RCOK;
}
Ejemplo n.º 4
0
rc_t btree_impl::_sx_opportunistic_adopt_foster (btree_page_h &parent,
                                                      btree_page_h &child, bool &pushedup,
                                                      const bool from_recovery)
{
    w_assert1 (parent.is_fixed());
    w_assert1 (parent.is_node());
    w_assert1 (child.is_fixed());

    pushedup = false;
    // let's try upgrading parent to EX latch. This highly likely fails in high-load situation,
    // so let's do it here to avoid system transaction creation cost.
    // we start from parent because EX latch on child is assured to be available in this order
    if (!parent.upgrade_latch_conditional()) {
        DBGOUT1(<< "opportunistic_adopt gave it up because of parent. "
                << parent.pid() << ". do nothing.");
        increase_ex_need(parent.pid()); // give a hint to subsequent accesses
        return RCOK;
    }
Ejemplo n.º 5
0
rc_t btree_impl::_ux_norec_alloc_core(btree_page_h &page, PageID &new_page_id) {
    // This is called only in REDO-only SSX, so no compensation logging. Just apply.
    w_assert1 (xct()->is_single_log_sys_xct());
    w_assert1 (page.latch_mode() == LATCH_EX);

    W_DO(smlevel_0::vol->alloc_a_page(new_page_id));
    btree_page_h new_page;
    w_rc_t rc;
    rc = new_page.fix_nonroot(page, new_page_id, LATCH_EX, false, true);

    if (rc.is_error()) {
        // if failed for any reason, we release the allocated page.
        W_DO(smlevel_0::vol ->deallocate_page(new_page_id));
        return rc;
    }

    // The new page has an empty key range; parent's high to high.
    w_keystr_t fence, chain_high;
    page.copy_fence_high_key(fence);
    bool was_right_most = (page.get_chain_fence_high_length() == 0);
    page.copy_chain_fence_high_key(chain_high);
    if (was_right_most) {
        // this means there was no chain or the page was the right-most of it.
        // (so its high=high of chain)
        // upon the first foster split, we start setting the chain-high.
        page.copy_fence_high_key(chain_high);
    }

#if W_DEBUG_LEVEL >= 3
    lsn_t old_lsn = page.get_page_lsn();
#endif //W_DEBUG_LEVEL

    W_DO(log_btree_norec_alloc(page, new_page, new_page_id, fence, chain_high));
    DBGOUT3(<< "btree_impl::_ux_norec_alloc_core, fence=" << fence << ", old-LSN="
        << old_lsn << ", new-LSN=" << page.get_page_lsn() << ", PID=" << new_page_id);

    // initialize as an empty child:
    new_page.format_steal(page.get_page_lsn(), new_page_id, page.store(),
                          page.root(), page.level(), 0, lsn_t::null,
                          page.get_foster_opaqueptr(), page.get_foster_emlsn(),
                          fence, fence, chain_high, false);
    page.accept_empty_child(page.get_page_lsn(), new_page_id, false /*not from redo*/);

    // in this operation, the log contains everything we need to recover without any
    // write-order-dependency. So, no registration for WOD.
    w_assert3(new_page.is_consistent(true, true));
    w_assert1(new_page.is_fixed());
    w_assert1(new_page.latch_mode() == LATCH_EX);

    w_assert3(page.is_consistent(true, true));
    w_assert1(page.is_fixed());
    return RCOK;
}
Ejemplo n.º 6
0
rc_t bt_cursor_t::_advance_one_slot(btree_page_h &p, bool &eof)
{
    w_assert1(p.is_fixed());
    w_assert1(_slot <= p.nrecs());

    if(_forward) {
        ++_slot;
    } else {
        --_slot;
    }
    eof = false;

    // keep following the next page.
    // because we might see empty pages to skip consecutively!
    while (true) {
        bool time2move = _forward ? (_slot >= p.nrecs()) : _slot < 0;

        if (time2move) {
            //  Move to right(left) sibling
            bool reached_end = _forward ? p.is_fence_high_supremum() : p.is_fence_low_infimum();
            if (reached_end) {
                eof = true;
                return RCOK;
            }
            // now, use fence keys to tell where the neighboring page exists
            w_keystr_t neighboring_fence;
            btree_impl::traverse_mode_t traverse_mode;
            bool only_low_fence_exact_match = false;
            if (_forward) {
                p.copy_fence_high_key(neighboring_fence);
                traverse_mode = btree_impl::t_fence_low_match;
                int d = _upper.compare(neighboring_fence);
                if (d < 0 || (d == 0 && !_upper_inclusive)) {
                    eof = true;
                    return RCOK;
                }
                if (d == 0 && _upper_inclusive) {
                    // we will check the next page, but the only
                    // possible matching is an entry with
                    // the low-fence..
                    only_low_fence_exact_match = true;
                }
            } else {
                // if we are going backwards, the current page had
                // low = [current-fence-low], high = [current-fence-high]
                // and the previous page should have
                // low = [?], high = [current-fence-low].
                p.copy_fence_low_key(neighboring_fence);
                // let's find a page which has this value as high-fence
                traverse_mode = btree_impl::t_fence_high_match;
                int d = _lower.compare(neighboring_fence);
                if (d >= 0) {
                    eof = true;
                    return RCOK;
                }
            }
            p.unfix();

            // take lock for the fence key
            if (_needs_lock) {
                lockid_t lid (_store, (const unsigned char*) neighboring_fence.buffer_as_keystr(), neighboring_fence.get_length_as_keystr());
                okvl_mode lock_mode;
                if (only_low_fence_exact_match) {
                    lock_mode = _ex_lock ? ALL_X_GAP_N: ALL_S_GAP_N;
                } else {
                    lock_mode = _ex_lock ? ALL_X_GAP_X : ALL_S_GAP_S;
                }
                // we can unconditionally request lock because we already released latch
                W_DO(ss_m::lm->lock(lid.hash(), lock_mode, true, true, true));
            }

            // TODO this part should check if we find an exact match of fence keys.
            // because we unlatch above, it's possible to not find exact match.
            // in that case, we should change the traverse_mode to fence_contains and continue
            W_DO(btree_impl::_ux_traverse(_store, neighboring_fence, traverse_mode, LATCH_SH, p));
            _slot = _forward ? 0 : p.nrecs() - 1;
            _set_current_page(p);
            continue;
        }

        // take lock on the next key.
        // NOTE: until we get locks, we aren't sure the key really becomes
        // the next key. So, we use the temporary variable _tmp_next_key_buf.
        const okvl_mode *mode = NULL;
        {
            p.get_key(_slot, _tmp_next_key_buf);
            if (_forward) {
                int d = _tmp_next_key_buf.compare(_upper);
                if (d < 0) {
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                } else if (d == 0 && _upper_inclusive) {
                    mode = _ex_lock ? &ALL_X_GAP_N : &ALL_S_GAP_N;
                } else {
                    eof = true;
                    mode = &ALL_N_GAP_N;
                }
            } else {
                int d = _tmp_next_key_buf.compare(_lower);
                if (d > 0) {
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                } else if (d == 0 && _lower_inclusive) {
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                } else {
                    eof = true;
                    mode = _ex_lock ? &ALL_N_GAP_X : &ALL_N_GAP_S;
                }
            }
        }
        if (_needs_lock && !mode->is_empty()) {
            rc_t rc = btree_impl::_ux_lock_key (_store, p, _tmp_next_key_buf,
                    LATCH_SH, *mode, false);
            if (rc.is_error()) {
                if (rc.err_num() == eLOCKRETRY) {
                    W_DO(_check_page_update(p));
                    continue;
                } else {
                    return rc;
                }
            }
        }
        // okay, now we are sure the _tmp_next_key_buf is the key we want to use
        _key = _tmp_next_key_buf;
        return RCOK; // found a record! (or eof)
    }
    return RCOK;
}