Ejemplo n.º 1
0
rc_t btree_impl::_sx_split_if_needed (btree_page_h &page, const w_keystr_t &new_key) {
    bool need_split =
        !page.check_space_for_insert_node(new_key)
        || (page.is_insertion_extremely_skewed_right()
            && page.check_chance_for_norecord_split(new_key));
    if (!need_split) {
        return RCOK; // easy
    }

    PageID new_page_id;
    // we are running user transaction. simply call SSX split.
    W_DO(_sx_split_foster(page, new_page_id, new_key));

    // After split, the new page might be the parent of the new_key now.
    if (!page.fence_contains(new_key)) {
        btree_page_h new_page;
        W_DO(new_page.fix_nonroot(page, new_page_id, LATCH_EX));
        w_assert1(new_page.fence_contains(new_key));
        page.unfix();
        page = new_page;
    }

    return RCOK;
}
Ejemplo n.º 2
0
rc_t bt_cursor_t::_advance_one_slot(btree_page_h &p, bool &eof)
{
    w_assert1(p.is_fixed());
    w_assert1(_slot <= p.nrecs());

    if(_forward) {
        ++_slot;
    } else {
        --_slot;
    }
    eof = false;

    // keep following the next page.
    // because we might see empty pages to skip consecutively!
    while (true) {
        bool time2move = _forward ? (_slot >= p.nrecs()) : _slot < 0;

        if (time2move) {
            //  Move to right(left) sibling
            bool reached_end = _forward ? p.is_fence_high_supremum() : p.is_fence_low_infimum();
            if (reached_end) {
                eof = true;
                return RCOK;
            }
            // now, use fence keys to tell where the neighboring page exists
            w_keystr_t neighboring_fence;
            btree_impl::traverse_mode_t traverse_mode;
            bool only_low_fence_exact_match = false;
            if (_forward) {
                p.copy_fence_high_key(neighboring_fence);
                traverse_mode = btree_impl::t_fence_low_match;
                int d = _upper.compare(neighboring_fence);
                if (d < 0 || (d == 0 && !_upper_inclusive)) {
                    eof = true;
                    return RCOK;
                }
                if (d == 0 && _upper_inclusive) {
                    // we will check the next page, but the only
                    // possible matching is an entry with
                    // the low-fence..
                    only_low_fence_exact_match = true;
                }
            } else {
                // if we are going backwards, the current page had
                // low = [current-fence-low], high = [current-fence-high]
                // and the previous page should have
                // low = [?], high = [current-fence-low].
                p.copy_fence_low_key(neighboring_fence);
                // let's find a page which has this value as high-fence
                traverse_mode = btree_impl::t_fence_high_match;
                int d = _lower.compare(neighboring_fence);
                if (d >= 0) {
                    eof = true;
                    return RCOK;
                }
            }
            p.unfix();

            // take lock for the fence key
            if (_needs_lock) {
                lockid_t lid (_store, (const unsigned char*) neighboring_fence.buffer_as_keystr(), neighboring_fence.get_length_as_keystr());
                okvl_mode lock_mode;
                if (only_low_fence_exact_match) {
                    lock_mode = _ex_lock ? ALL_X_GAP_N: ALL_S_GAP_N;
                } else {
                    lock_mode = _ex_lock ? ALL_X_GAP_X : ALL_S_GAP_S;
                }
                // we can unconditionally request lock because we already released latch
                W_DO(ss_m::lm->lock(lid.hash(), lock_mode, true, true, true));
            }

            // TODO this part should check if we find an exact match of fence keys.
            // because we unlatch above, it's possible to not find exact match.
            // in that case, we should change the traverse_mode to fence_contains and continue
            W_DO(btree_impl::_ux_traverse(_store, neighboring_fence, traverse_mode, LATCH_SH, p));
            _slot = _forward ? 0 : p.nrecs() - 1;
            _set_current_page(p);
            continue;
        }

        // take lock on the next key.
        // NOTE: until we get locks, we aren't sure the key really becomes
        // the next key. So, we use the temporary variable _tmp_next_key_buf.
        const okvl_mode *mode = NULL;
        {
            p.get_key(_slot, _tmp_next_key_buf);
            if (_forward) {
                int d = _tmp_next_key_buf.compare(_upper);
                if (d < 0) {
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                } else if (d == 0 && _upper_inclusive) {
                    mode = _ex_lock ? &ALL_X_GAP_N : &ALL_S_GAP_N;
                } else {
                    eof = true;
                    mode = &ALL_N_GAP_N;
                }
            } else {
                int d = _tmp_next_key_buf.compare(_lower);
                if (d > 0) {
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                } else if (d == 0 && _lower_inclusive) {
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                } else {
                    eof = true;
                    mode = _ex_lock ? &ALL_N_GAP_X : &ALL_N_GAP_S;
                }
            }
        }
        if (_needs_lock && !mode->is_empty()) {
            rc_t rc = btree_impl::_ux_lock_key (_store, p, _tmp_next_key_buf,
                    LATCH_SH, *mode, false);
            if (rc.is_error()) {
                if (rc.err_num() == eLOCKRETRY) {
                    W_DO(_check_page_update(p));
                    continue;
                } else {
                    return rc;
                }
            }
        }
        // okay, now we are sure the _tmp_next_key_buf is the key we want to use
        _key = _tmp_next_key_buf;
        return RCOK; // found a record! (or eof)
    }
    return RCOK;
}
Ejemplo n.º 3
0
rc_t
btree_impl::_ux_lock_key(
    const StoreID&      store,
    btree_page_h&      leaf,
    const void*        keystr,
    size_t             keylen,
    latch_mode_t       latch_mode,
    const okvl_mode&   lock_mode,
    bool               check_only
    )
{
    // Callers:
    // 1. Top level _ux_lock_key() - I/U/D and search operations, lock conflict is possible
    // 2. _ux_lock_range() - lock conflict is possible
    //
    // Lock conflict:
    // 1. Deadlock - the asking lock is held by another transaction currently, and the
    //                      current transaction is holding other locks already, failed
    // 2. Timeout -  the asking lock is held by another transaction currently, but the
    //                     current transaction does not hold other locks, okay to retry

    // For restart operation using lock re-acquisition:
    // 1. On_demand or mixed UNDO - when lock conflict. it triggers UNDO transaction rollback
    //                                                 this is a blocking operation, meaning the other concurrent
    //                                                 transactions asking for the same lock are blocked, no deadlock
    // 2. Traditional UNDO - original behavior, either deadlock error or timeout and retry

    lockid_t lid (store, (const unsigned char*) keystr, keylen);
    // first, try conditionally. we utilize the inserted lock entry even if it fails
    RawLock* entry = nullptr;

    // The lock request does the following:
    // If the lock() failed to acquire lock (trying to acquire lock while holding the latch) and
    // if the transaction doesn't have any other locks, because 'condition' is true, lock()
    // returns immediatelly with eCONDLOCKTIMEOUT which indicates it failed to
    // acquire lock but no deadlock worry and the lock entry has been created already.
    // In this case caller (this function) releases latch and try again using retry_lock()
    // which is a blocking operation, this is because it is safe to forever retry without
    // risking deadlock
    // If the lock() returns eDEADLOCK, it means lock acquisition failed and
    // the current transaction already held other locks, it is not safe to retry (will cause
    // further deadlocks) therefore caller must abort the current transaction
    rc_t lock_rc = lm->lock(lid.hash(), lock_mode, true /*check */, false /* wait */,
            !check_only /* acquire */, smthread_t::xct(),timeout_t::WAIT_IMMEDIATE, &entry);

    if (!lock_rc.is_error()) {
        // lucky! we got it immediately. just return.
        return RCOK;
    } else {
        // if it caused deadlock and it was chosen to be victim, give up! (not retry)
        if (lock_rc.err_num() == eDEADLOCK)
        {
            // The user transaction will abort and rollback itself upon deadlock detection.
            // Because Express does not have a deadlock monitor and policy to determine
            // which transaction to rollback during a deadlock (should abort the cheaper
            // transaction), the user transaction which detects deadlock will be aborted.
            w_assert1(entry == nullptr);
            return lock_rc;
        }

        // couldn't immediately get it. then we unlatch the page and wait.
        w_assert1(lock_rc.err_num() == eCONDLOCKTIMEOUT);
        w_assert1(entry != nullptr);

        // we release the latch here. However, we increment the pin count before that
        // to prevent the page from being evicted.
        pin_for_refix_holder pin_holder(leaf.pin_for_refix()); // automatically releases the pin
        lsn_t prelsn = leaf.get_page_lsn(); // to check if it's modified after this unlatch
        leaf.unfix();
        // then, we try it unconditionally (this will block)
        W_DO(lm->retry_lock(&entry, !check_only /* acquire */));
        // now we got the lock.. but it might be changed because we unlatched.
        w_rc_t refix_rc = leaf.refix_direct(pin_holder._idx, latch_mode);
        if (refix_rc.is_error() || leaf.get_page_lsn() != prelsn)
        {
            // release acquired lock
            if (entry != nullptr) {
                w_assert1(!check_only);
                lm->unlock(entry);
            } else {
                w_assert1(check_only);
            }
            if (refix_rc.is_error())
            {
                return refix_rc;
            }
            else
            {
                w_assert1(leaf.get_page_lsn() != prelsn); // unluckily, it's the case
                return RC(eLOCKRETRY); // retry!
            }
        }
        return RCOK;
    }
}