Ejemplo n.º 1
0
w_rc_t                        
bf_prefetch_thread_t::fetch(
    const lpid_t&       pid,
    page_p&                page                
) 
{
    FUNC(bf_prefetch_thread_t::fetch);
    bool got;
    latch_mode_t mode;

    DBGTHRD(<<"fetching -- awaiting mutex...");
    CRITICAL_SECTION(cs, _prefetch_mutex);

    if(get_error()) {
        w_rc_t rc(_fix_error.delegate());
        return rc;
    }
    int i = _f; 
    bf_prefetch_thread_t::frame_info &inf = _info[i];

    w_assert3(inf._status != pf_init);
    // caller must have requested it

    mode = inf._mode;

    if(inf._pid == pid && inf._status == pf_available ) {
        page = inf._page; // refixes
        got = true;
    } else {
        w_assert3(inf._status == pf_requested ||
            inf._status == pf_in_transit ||
            inf._status == pf_grabbed 
            );

        got = false;
    }
    new_state(i,pf_fetch);
    w_assert3( inf._status == pf_init || inf._status == pf_grabbed);

    cs.exit();

    DBGTHRD(<<"fetching -- released mutex...");
    w_rc_t rc;
    if(!got) {
        // Just go ahead and fix it here.
        // If _status is pf_in_transit, we should
        // block in the buffer manger; if it's 
        // pf_requested, the thread hasn't run yet (which
        // really shouldn't be the case if we're prefetching
        // at most 1 at a time).

        DBGTHRD(<<"did not get -- fixing page...");
        smlevel_0::store_flag_t store_flags = smlevel_0::st_bad;
        rc = page.fix(pid, page_p::t_any_p, mode, 0, store_flags);
        if(rc.is_error()) {
            CRITICAL_SECTION(cs2, _prefetch_mutex);
            _fix_error = rc;
            new_state(i, pf_error);
        }
    }
Ejemplo n.º 2
0
void 
sortorder::Ibyteorder(int permutation[4]) 
{
    /* The following magic constant has the representation
     * 0x3f404142 on a BIGLONG machine.
     */
    int magic = 0x3f404142;
    u_char *p = (u_char *)&magic;
    int i;
    for (i=0;i<4;i++)
        permutation[i] = p[i] - 0x3f;
#ifdef BIGLONG
    /* verify that the BIGLONG assertion is correct */
    for (i=0;i<4;i++) w_assert1(permutation[i] == i);

   w_assert3(w_base_t::is_big_endian()); 
#else
#if W_DEBUG_LEVEL > 2
    // Make sure lexify agrees with w_base_t
    if(permutation[1] == 1) {
       w_assert3(w_base_t::is_big_endian()); 
    } else {
       w_assert3(w_base_t::is_little_endian()); 
    }
#endif 
#endif
}
Ejemplo n.º 3
0
/*
 * Generate a reordering permuatation for an integer of length size
 * to convert from big endian to small endian and back again.
 *
 * This will not work correctly on systems with "twisted" byte
 * orders where different types are in a different byte order.
 * On the other hand, such a system breaks numerous other assumptions
 * throughout the  system!
 */
void sortorder::Ibyteorder(int *permutation, int size) 
{
#if W_DEBUG_LEVEL > 2
    /*
     * XXX Paranoia, overly so.  w_base_t should be paranoid.
     * Verify that our concept of byte order matches the base class
     */
    int2_t    magic = 0x1234;
    bool    my_big_endian;

    my_big_endian = ((uint1_t *)&magic)[0] == 0x12;
        
    if (my_big_endian) {
        w_assert3(w_base_t::is_big_endian()); 
    }
    else {
        w_assert3(w_base_t::is_little_endian()); 
    }
#endif

#ifdef BIGLONG
#error    "BIGLONG not supported"
#endif
    
    int    i;
    if (w_base_t::is_big_endian())
        for  (i = 0; i < size; i++)
            permutation[i] = i;
    else
        for (i = 0; i < size; i++)
            permutation[i] = size - i - 1;
}
Ejemplo n.º 4
0
rc_t btree_impl::_ux_adopt_foster_core (btree_page_h &parent, btree_page_h &child,
    const w_keystr_t &new_child_key)
{
    w_assert1 (g_xct()->is_single_log_sys_xct());
    w_assert1 (parent.is_fixed());
    w_assert1 (parent.latch_mode() == LATCH_EX);
    w_assert1 (parent.is_node());
    w_assert1 (child.is_fixed());
    w_assert1 (child.latch_mode() == LATCH_EX);
    w_assert0 (child.get_foster() != 0);

    PageID new_child_pid = child.get_foster();
    if (smlevel_0::bf->is_swizzled_pointer(new_child_pid)) {
        smlevel_0::bf->unswizzle(parent.get_generic_page(),
                GeneralRecordIds::FOSTER_CHILD, true, &new_child_pid);
    }
    w_assert1(!smlevel_0::bf->is_swizzled_pointer(new_child_pid));

    lsn_t child_emlsn = child.get_foster_emlsn();
    W_DO(log_btree_foster_adopt (parent, child, new_child_pid, child_emlsn, new_child_key));
    _ux_adopt_foster_apply_parent (parent, new_child_pid, child_emlsn, new_child_key);
    _ux_adopt_foster_apply_child (child);

    // Switch parent of newly adopted child
    // CS TODO: I'm not sure we can do this because we don't hold a latch on new_child_pid
    smlevel_0::bf->switch_parent(new_child_pid, parent.get_generic_page());

    w_assert3(parent.is_consistent(true, true));
    w_assert3(child.is_consistent(true, true));
    return RCOK;
}
Ejemplo n.º 5
0
rc_t btree_impl::_ux_norec_alloc_core(btree_page_h &page, PageID &new_page_id) {
    // This is called only in REDO-only SSX, so no compensation logging. Just apply.
    w_assert1 (xct()->is_single_log_sys_xct());
    w_assert1 (page.latch_mode() == LATCH_EX);

    W_DO(smlevel_0::vol->alloc_a_page(new_page_id));
    btree_page_h new_page;
    w_rc_t rc;
    rc = new_page.fix_nonroot(page, new_page_id, LATCH_EX, false, true);

    if (rc.is_error()) {
        // if failed for any reason, we release the allocated page.
        W_DO(smlevel_0::vol ->deallocate_page(new_page_id));
        return rc;
    }

    // The new page has an empty key range; parent's high to high.
    w_keystr_t fence, chain_high;
    page.copy_fence_high_key(fence);
    bool was_right_most = (page.get_chain_fence_high_length() == 0);
    page.copy_chain_fence_high_key(chain_high);
    if (was_right_most) {
        // this means there was no chain or the page was the right-most of it.
        // (so its high=high of chain)
        // upon the first foster split, we start setting the chain-high.
        page.copy_fence_high_key(chain_high);
    }

#if W_DEBUG_LEVEL >= 3
    lsn_t old_lsn = page.get_page_lsn();
#endif //W_DEBUG_LEVEL

    W_DO(log_btree_norec_alloc(page, new_page, new_page_id, fence, chain_high));
    DBGOUT3(<< "btree_impl::_ux_norec_alloc_core, fence=" << fence << ", old-LSN="
        << old_lsn << ", new-LSN=" << page.get_page_lsn() << ", PID=" << new_page_id);

    // initialize as an empty child:
    new_page.format_steal(page.get_page_lsn(), new_page_id, page.store(),
                          page.root(), page.level(), 0, lsn_t::null,
                          page.get_foster_opaqueptr(), page.get_foster_emlsn(),
                          fence, fence, chain_high, false);
    page.accept_empty_child(page.get_page_lsn(), new_page_id, false /*not from redo*/);

    // in this operation, the log contains everything we need to recover without any
    // write-order-dependency. So, no registration for WOD.
    w_assert3(new_page.is_consistent(true, true));
    w_assert1(new_page.is_fixed());
    w_assert1(new_page.latch_mode() == LATCH_EX);

    w_assert3(page.is_consistent(true, true));
    w_assert1(page.is_fixed());
    return RCOK;
}
Ejemplo n.º 6
0
void
bf_prefetch_thread_t::new_state(int i, prefetch_event_t e)
{
    FUNC(bf_prefetch_thread_t::new_state);
    // ASSUMES CALLER HAS THE MUTEX

    prefetch_status_t        nw;
    prefetch_status_t        old;
    bf_prefetch_thread_t::frame_info &inf = _info[i];
    old = inf._status;
    if( (nw = _table[old][e]) == pf_fatal) {
        std::cerr << "Bad transition for state " << int(old)
                << " and event " << int(e)
                <<std::endl;
        W_FATAL(fcINTERNAL);
    }
    DBGTHRD(<< " change : _table[" << int(old) << "," << int(e)
        << "] ->" << int(nw));

    inf._status = nw;
    if(old != nw) {
        switch(nw) {
        case pf_failure:
            w_assert3(_fix_error.is_error());
            _fix_error_i = i;
            break;
        case pf_grabbed:
            w_assert3(_n == 2);
            DBGTHRD(<<"BUMPING INDEX from " << _f 
                << " to " << (1-_f)
                );
            _f = 1-_f;
            break;
        case pf_init:
            if(old != pf_failure) {
                inf._page.unfix();
            }
            break;
        case pf_available:
            // Must unfix because the fetching thread
            // cannot do so.
            inf._page.unfix();
            break;
        default:
            break;
        }
    }
}
Ejemplo n.º 7
0
w_rc_t                        
bf_prefetch_thread_t::request(
    const lpid_t&       pid,
    latch_mode_t        mode
) 
{
    FUNC(bf_prefetch_thread_t::request);

    w_assert3(mode != LATCH_NL); // MUST latch the page

    CRITICAL_SECTION(cs, _prefetch_mutex);
    if(get_error()) {
        w_rc_t rc(_fix_error.delegate());
        return rc;
    }

    int i = _f; 
    bf_prefetch_thread_t::frame_info &inf = _info[i];

    DBGTHRD(<<"request! i=" << i
        << " pid " << pid
        << " mode " << int(mode)
        << " old status " << int(inf._status)
    );

    w_assert3(inf._status == pf_init);
    // There should always be one available -- at least
    // when used with scan TODO -- make more general

    INC_TSTAT(bf_prefetch_requests);

    /*  Assert that we haven't got a frame read from disk
     *  and never used (fetched)
     */

    inf._pid = pid;
    inf._mode = mode;
    new_state(i, pf_request);
    w_assert3(inf._status == pf_requested);

    cs.exit();

    DBGTHRD(<< "released mutex; signalling...");
    DO_PTHREAD(pthread_cond_signal(&_activate));

    DBGTHRD(<< "returning from request");
    return _fix_error;
}
Ejemplo n.º 8
0
rc_t btree_impl::_sx_adopt_foster_all_core (
    btree_page_h &parent, bool is_root, bool recursive)
{
    // TODO this should use the improved tree-walk-through
    // See jira ticket:60 "Tree walk-through without more than 2 pages latched" (originally trac ticket:62)
    w_assert1 (xct()->is_sys_xct());
    w_assert1 (parent.is_fixed());
    w_assert1 (parent.latch_mode() == LATCH_EX);
    if (parent.is_node()) {
        w_assert1(parent.pid0());
        W_DO(_sx_adopt_foster_sweep(parent));
        if (recursive) {
            // also adopt at all children recursively
            for (int i = -1; i < parent.nrecs(); ++i) {
                btree_page_h child;
                PageID shpid_opaqueptr = i == -1 ? parent.get_foster_opaqueptr() : parent.child_opaqueptr(i);
                W_DO(child.fix_nonroot(parent, shpid_opaqueptr, LATCH_EX));
                W_DO(_sx_adopt_foster_all_core(child, false, true));
            }
        }

    }
    // after all adopts, if this parent is the root and has foster,
    // let's grow the tree
    if  (is_root && parent.get_foster()) {
        W_DO(_sx_grow_tree(parent));
        W_DO(_sx_adopt_foster_sweep(parent));
    }
    w_assert3(parent.is_consistent(true, true));
    return RCOK;
}
Ejemplo n.º 9
0
void
file_pg_stats_t::add(const file_pg_stats_t& stats)
{
    w_assert3(sizeof(*this) % sizeof(base_stat_t) == 0);
    for (uint i = 0; i < sizeof(*this)/sizeof(hdr_bs); i++) {
        ((base_stat_t*)this)[i] += ((base_stat_t*)&stats)[i];
    }
}
Ejemplo n.º 10
0
void
file_pg_stats_t::clear()
{
    w_assert3(sizeof(*this) % sizeof(base_stat_t) == 0);
    for (uint i = 0; i < sizeof(*this)/sizeof(base_stat_t); i++) {
        ((base_stat_t*)this)[i] = 0;
    }
}
Ejemplo n.º 11
0
void btree_page_data::remove_items(
                      const int item_count,    // In: Number of records to remove
                      const w_keystr_t &high)  // In: high fence after record removal
{
    // Use this function with caution

    // A special helper function to remove 'item_count' largest items from the storage
    // this function is only used by full logging page rebalance restart operation
    // to recover the source page after a system crash
    // the caller resets the fence keys on source page which eliminate some
    // of the records from source page
    // this function removes the largest 'item_count' items from the page
    // because they belong to destination page after the rebalance
    // After the removal, item count changed but no change to ghost count

    w_assert1(btree_level >= 1);
    w_assert1(nitems > item_count);          // Must have at least one record which is the fency key record
    w_assert3(_items_are_consistent());

    if ((0 == item_count) || (1 == nitems))  // If 1 == nitems, we only have a fence key record
        return;

    DBGOUT3( << "btree_page_data::reset_item_count - before deletion item count: " << nitems
             << ", new high fence key: " << high);

    int remaining = item_count;
    char* high_key_p = (char *)high.buffer_as_keystr();
    size_t high_key_length = (size_t)high.get_length_as_keystr();
    while (0 < remaining)
    {
        w_assert1(1 < nitems);
        // Find the records with key >= new high fence key and delete them
        int item_index = 1;  // Start with index 1 since 0 is for the fence key record
        uint16_t* key_length;;
        size_t item_len;

        int cmp;
        const int data_offset = sizeof(uint16_t);  // To skipover the portion which contains the size of variable data
        for (int i = item_index; i < nitems; ++i)
        {
            key_length = (uint16_t*)item_data(i);
            item_len = *key_length++;

            cmp = ::memcmp(high_key_p, item_data(i)+data_offset, (high_key_length<=item_len)? high_key_length : item_len);
            if ((0 > cmp) || ((0 == cmp) && (high_key_length <= item_len)))
            {
                // The item is larger than the new high fence key or the same as high fence key (high fence is ghost)
                DBGOUT3( << "btree_page_data::reset_item_count - delete record index: " << i);

                // Delete the item, which changes nitems but no change to nghosts
                // therefore break out the loop and start the loop again if we have more items to remove
                delete_item(i);
                break;
            }
        }

        --remaining;
    }
Ejemplo n.º 12
0
void btree_page_data::init_items() {
    w_assert1(btree_level >= 1);

    nitems          = 0;
    nghosts         = 0;
    first_used_body = max_bodies;

    w_assert3(_items_are_consistent());
}
Ejemplo n.º 13
0
/**\brief Called to effect a detach_xct(). 
 *
 * \details
 * N Threads point to 1 xct_t; xct_ts do not point to threads because
 * of the 1:N relationship.
 *
 * A thread holds some cached info on behalf of a transaction.
 * This is in 3 structures.  If a thread were attached to a transaction
 * for the transaction's duration, we wouldn't go to this trouble, but
 * because threads attach/detach, reattach/detach and perhaps several
 * threads act for an xct at once, we try to avoid the excess heap
 * activity and cache-repopulation that would result.
 *
 * When a thread/xct relationship is broken, the thread tries to stash
 * its caches in the xct_t structure.  If the xct subsequently goes
 * away, the xct deletes these caches and returns them to the global heap.
 * If another thread attaches to the xct, it will grab these structures
 * from the xct at attach-time.
 *
 * This smthread can only stash these caches in the xct_t if the xct_t
 * doesn't already have some stashed. In other words, if 3 threads
 * detach from the same xct in succession, the first thread's caches will
 * be stashed in the xct and the other 2 will be returned to the heap.
 * If these 3 sthreads subsequently reattach to the same xct, the first
 * one to attach will steal back the caches and the next two will
 * allocate from the heap.
 *
 * In addition to these 3 caches, the thread holds statistics for
 * an instrumented transaction.
 */
void
smthread_t::no_xct(xct_t *x)
{
    w_assert3(x);
    w_assert3(x == tcb().xct || tcb().xct==NULL);
    /* collect summary statistics */ 

    // Don't collect again if we already detached. If we did
    // already detach, the stats values should be 0 to it would
    // be correct if we did this,  but it's needless work.
    //
    if(tcb().xct == x && x->is_instrumented()) 
    {
        // NOTE: thread-safety comes from the fact that this is called from
        // xct_impl::detach_thread, which first grabs the 1thread-at-a-time
        // mutex.
        sm_stats_info_t &s = x->stats_ref();
        /*
         * s refers to the __stats passed in on begin_xct() for an
         * instrumented transaction.  
         * We add in the per-thread stats and zero out the per-thread copy.
         * This means that if we are collecting stats on a per-xct basis,
         * these stats don't get counted in the global stats.
         *
         * Note also that this is a non-atomic add.
         */
        s += TL_stats(); // sm_stats_info_t

        /* 
         * The stats have been added into the xct's structure, 
         * so they must be cleared for the thread.
         */
        tcb().clear_TL_stats();
    }

    /* See comments in smthread_t::new_xct() */
    DBG(<<"no_xct: id=" << me()->id);
    x->stash(
            tcb()._lock_hierarchy,
            tcb()._sdesc_cache,
            tcb()._xct_log);
}
Ejemplo n.º 14
0
NORET                        
bf_prefetch_thread_t::bf_prefetch_thread_t(int i) 
: smthread_t(t_regular, "bf_prefetch"),
  _fix_error_i(0),
  _n(i+1),
  _info(0),
  _f(0),
  _retire(false)
{
    FUNC(bf_prefetch_thread_t::bf_prefetch_thread_t);
    // that's all that's supported at the moment
    w_assert3(i==1);
    _init(_n);
}
Ejemplo n.º 15
0
/*
 * Scan the file, deleting corresponding entries from
 * the rtree index.  Probe, Delete the key/elem pr, re-probe,
 * re-insert, re-probe, re-delete, re-probe.
 * This tests insert/remove of null entries, for one thing.
 * The file given should be the original file if it still
 * exists, so that we can avoid deleting in sorted order.
 */
w_rc_t
delete_rtree_entries(
    stid_t idx,
    stid_t fid,
    smsize_t keyoffset
)
{
    char    stringbuffer[MAXBV];
    scan_file_i  scanf(fid, ss_m::t_cc_file);

    bool    nullfound=false;
    bool     feof;
    w_rc_t    rc;
    pin_i*    pin;
    nbox_t     key;
    vec_t     elem;
    smsize_t     klen, elen;
    rid_t    rid;
    int     i=0;
    while ( !(rc=scanf.next(pin, 0, feof)).is_error() && !feof ) {
    i++;

    smsize_t ridoffset = pin->body_size() - sizeof(rid_t);
    klen = ridoffset - keyoffset;

    smsize_t offset = keyoffset;

    /* Get key from file record */
    while(pin->start_byte()+pin->length() <= offset){ 
        rc = pin->next_bytes(feof); 
        if(rc.is_error()) {
        DBG(<<"rc=" << rc);
        return RC_AUGMENT(rc);
        }
        w_assert3(!feof);
    }
    offset -=  pin->start_byte();
    // not handling logical case...
        smsize_t amt = pin->length() - offset;
        DBG(<<"offset=" <<offset << " amt=" << amt);
        memcpy(&stringbuffer, pin->body() + offset, amt);

    if(offset + klen > pin->length()) {
        rc = pin->next_bytes(feof); 
        if(rc.is_error()) {
        DBG(<<"rc=" << rc);
        return RC_AUGMENT(rc);
        }
Ejemplo n.º 16
0
int SearchableHeap<T, Cmp>::Search(int i, const T& t)
{
    w_assert3(HeapProperty(0));
    DBGTHRD(<<"Search starting at " << i
    << ", numElements=" << numElements
    );
    if(i > numElements-1) return -1;

    int parent = i; // root
    // First, check sibling of parent if parent != root
    if (parent >0 && (RightSibling(parent) < numElements))  {
        DBGTHRD(<<"check right sibling: " << RightSibling(parent));
        if (cmp.ge(elements[RightSibling(parent)], t)) {
            return RightSibling(parent);
        }
    }
Ejemplo n.º 17
0
void latch_t::on_thread_destroy(sthread_t *who)
{
    {
       CRITICAL_SECTION(cs, holder_list_list_lock);
       holder_list_list.erase(who);
    }

    w_assert3(!latch_holder_t::thread_local_holders);
    latch_holder_t* freelist = latch_holder_t::thread_local_freelist;
    while(freelist) {
        latch_holder_t* node = freelist;
        freelist = node->_next;
        delete node;
    }
    latch_holder_t::thread_local_freelist = NULL;
}
Ejemplo n.º 18
0
rc_t
ss_m::_bulkld_index(
    const stid_t&         stid,
    int                   nsrcs,
    const stid_t*         source,
    sm_du_stats_t&        _stats,
    bool                  sort_duplicates, //  = true
    bool                  lexify_keys //  = true
    )
{
    sdesc_t* sd;
    W_DO( dir->access(t_index, stid, sd, EX ) );

    if (sd->sinfo().stype != t_index)   return RC(eBADSTORETYPE);
    switch (sd->sinfo().ntype) {
    case t_btree:
    case t_uni_btree:
        DBG(<<"bulk loading root " << sd->root());
        W_DO( bt->bulk_load(sd->root(), 
            nsrcs,
            source,
            sd->sinfo().nkc, sd->sinfo().kc,
            sd->sinfo().ntype == t_uni_btree, 
            (concurrency_t)sd->sinfo().cc,
            _stats.btree,
            sort_duplicates,
            lexify_keys
            ) );
        break;
    default:
        return RC(eBADNDXTYPE);
    }
    {
        store_flag_t st;
        W_DO( io->get_store_flags(stid, st) );
        w_assert3(st != st_bad);
        if(st & (st_tmp|st_insert_file|st_load_file)) {
            DBG(<<"converting stid " << stid <<
                " from " << st << " to st_regular " );
            // After bulk load, it MUST be re-converted
            // to regular to prevent unlogged arbitrary inserts
            // Invalidate the pages so the store flags get reset
            // when the pages are read back in
            W_DO( io->set_store_flags(stid, st_regular) );
        }
    }
Ejemplo n.º 19
0
rc_t
lg_tag_indirect_h::convert(const lg_tag_chunks_h& old_tag)
{
    FUNC(lg_tag_indirect_h::convert);
    const smsize_t max_pages = 64;
    w_assert3(_iref.indirect_root == 0);

    lpid_t    page_list[max_pages];

    smsize_t old_cnt = old_tag.page_count();

    for (_page_cnt = 0; _page_cnt < old_cnt; _page_cnt += max_pages) {
        uint4_t num_pages;
        uint4_t maxpgs = MIN(uint4_t(max_pages), old_cnt - _page_cnt);
        for (num_pages = 0; num_pages < maxpgs; num_pages++ ) {
                page_list[num_pages] = old_tag.pid(_page_cnt+num_pages);
        }
        W_DO(append(num_pages, page_list));
    }
    return RCOK;
}
Ejemplo n.º 20
0
void xct_lock_info_t::remove_request (xct_lock_entry_t *entry) {
#if W_DEBUG_LEVEL>=3
    bool found = false;
    for (xct_lock_entry_t *p = _head; p != NULL; p = p->next) {
        if (p == entry) {
            found = true;
            break;
        }
    }
    w_assert3(found);
#endif //W_DEBUG_LEVEL>=3
    if (entry->prev == NULL) {
        // then it should be current head
        w_assert1(_head == entry);
        _head = entry->next;
        if (_head != NULL) {
            _head->prev = NULL;
        }
    } else {
        w_assert1(entry->prev->next == entry);
        entry->prev->next = entry->next;
    }

    if (entry->next == NULL) {
        // then it should be current tail
        w_assert1(_tail == entry);
        _tail = entry->prev;
        if (_tail != NULL) {
            _tail->next = NULL;
        }
    } else {
        w_assert1(entry->next == _head || entry->next->prev == entry);
        entry->next->prev = entry->prev;
    }

    _hashmap.remove(entry); //removes from private hashmap, too

    xctLockEntryPool->destroy_object(entry);
}
Ejemplo n.º 21
0
rc_t bt_cursor_t::next()
{
    if (!is_valid()) {
        return RCOK; // EOF
    }

    if (_first_time) {
        _first_time = false;
        W_DO(_locate_first ());
        if (_eof) {
            return RCOK;
        }
    }

    w_assert3(_pid);
    btree_page_h p;
    W_DO(_refix_current_key(p));
    w_assert3(p.is_fixed());
    w_assert3(p.pid() == _pid);

    W_DO(_check_page_update(p));

    // Move one slot to the right(left if backward scan)
    bool eof_ret = false;
    W_DO(_find_next(p, eof_ret));

    if (eof_ret) {
        close();
        return RCOK;
    }

    w_assert3(p.is_fixed());
    w_assert3(p.is_leaf());

    w_assert3(_slot >= 0);
    w_assert3(_slot < p.nrecs());

    // get the current slot's values
    W_DO( _make_rec(p) );
    return RCOK;
}
Ejemplo n.º 22
0
/* 
 * truncate() removes pages at the end of large records
 * implemented as a set of chunks.
 */
rc_t
lg_tag_chunks_h::truncate(uint4_t num_pages)
{
    FUNC(lg_tag_chunks_h::truncate);
    smsize_t first_dealloc = page_count()-num_pages;
    smsize_t last_dealloc = page_count()-1;
#if W_DEBUG_LEVEL > 2
    uint4_t check_dealloc = 0;
#endif 

    { // without this bracketing, 
          // VC++ thinks this smsize_t i is in the same
          // scope as the int i in the next for loop
        for (smsize_t i = first_dealloc; i <= last_dealloc; i++) {
            DBG(<<"freeing page " << pid(i));
            W_DO(smlevel_0::io->free_page(pid(i)));
#if W_DEBUG_LEVEL > 2
            check_dealloc++;
#endif 
        }
    }
    w_assert3(check_dealloc == num_pages);

    for (int i = _cref.chunk_cnt-1; i >= 0 && num_pages > 0; i--) {

        if (_cref.chunks[i].npages <= num_pages) {
            num_pages -= _cref.chunks[i].npages;
            _cref.chunk_cnt--; // this chunk is not needed
        } else {
            _cref.chunks[i].npages -= num_pages;
            num_pages -= num_pages;
        }
    }

    w_assert9(num_pages == 0);
    return RCOK;
}
Ejemplo n.º 23
0
void                        
bf_prefetch_thread_t::retire() 
{ 
    FUNC(bf_prefetch_thread_t::retire);
    {
        CRITICAL_SECTION(cs, _prefetch_mutex);
        _retire = true; 
    } // end critical section

    w_assert3( me() != this );

    w_rc_t e;
    for (;;) {
        /* keep hosing the thread until it dies */
            /* XXX This is bogus. telling it to shutdown and waiting
               should be enough. */
        DO_PTHREAD(pthread_cond_signal(&_activate));
        e = join(1000);
        if (!e.is_error())
            break;
        else if (e.err_num() != smthread_t::stTIMEOUT)
            W_COERCE(e);
    }        
}
Ejemplo n.º 24
0
rc_t bt_cursor_t::_locate_first() {
    // at the first access, we get an intent lock on store/volume
    if (_needs_lock) {
        W_DO(smlevel_0::lm->intent_store_lock(_store, _ex_lock ? okvl_mode::IX : okvl_mode::IS));
    }

    if (_lower > _upper || (_lower == _upper && (!_lower_inclusive || !_upper_inclusive))) {
        _eof = true;
        return RCOK;
    }

    // loop because btree_impl::_ux_lock_key might return eLOCKRETRY
    while (true) {
        // find the leaf (potentially) containing the key
        const w_keystr_t &key = _forward ? _lower : _upper;
        btree_page_h leaf;
        bool found = false;
        W_DO( btree_impl::_ux_traverse(_store, key, btree_impl::t_fence_contain, LATCH_SH, leaf));
        w_assert3 (leaf.fence_contains(key));
        _set_current_page(leaf);

        w_assert1(leaf.is_fixed());
        w_assert1(leaf.is_leaf());

        // then find the tuple in the page
        leaf.search(key, found, _slot);

        const okvl_mode *mode = NULL;
        if (found) {
            // exact match!
            _key = key;
            if (_forward) {
                if (_lower_inclusive) {
                    // let's take range lock too to reduce lock manager calls
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                    _dont_move_next = true;
                } else {
                    mode = _ex_lock ? &ALL_N_GAP_X : &ALL_N_GAP_S;
                    _dont_move_next = false;
                }
            } else {
                // in backward case we definitely don't need the range part
                if (_upper_inclusive) {
                    mode = _ex_lock ? &ALL_X_GAP_N : &ALL_S_GAP_N;
                    _dont_move_next = true;
                } else {
                    // in this case, we don't need lock at all
                    mode = &ALL_N_GAP_N;
                    _dont_move_next = false;
                    // only in this case, _key might disappear. otherwise,
                    // _key will exist at least as a ghost entry.
                }
            }
        } else {
            // key not found. and search_leaf returns the slot the key will be inserted.
            // in other words, val(slot - 1) < key < val(slot).
            w_assert1(_slot >= 0);
            w_assert1(_slot <= leaf.nrecs());

            if (_forward) {
                --_slot; // subsequent next() will read the slot
                if (_slot == -1) {
                    // we are hitting the left-most of the page. (note: found=false)
                    // then, we take lock on the fence-low key
                    _dont_move_next = false;
                    leaf.copy_fence_low_key(_key);
                } else {
                    _dont_move_next = false;
                    leaf.get_key(_slot, _key);
                }
                mode = _ex_lock ? &ALL_N_GAP_X : &ALL_N_GAP_S;
            } else {
                // subsequent next() will read the previous slot
                --_slot;
                if (_slot == -1) {
                    // then, we need to move to even more previous slot in previous page
                    _dont_move_next = false;
                    leaf.copy_fence_low_key(_key);
                    mode = _ex_lock ? &ALL_N_GAP_X : &ALL_N_GAP_S;
                } else {
                    _dont_move_next = true;
                    leaf.get_key(_slot, _key);
                    // let's take range lock too to reduce lock manager calls
                    mode = _ex_lock ? &ALL_X_GAP_X : &ALL_S_GAP_S;
                }
            }
        }
        if (_needs_lock && !mode->is_empty()) {
            rc_t rc = btree_impl::_ux_lock_key (_store, leaf, _key, LATCH_SH, *mode, false);
            if (rc.is_error()) {
                if (rc.err_num() == eLOCKRETRY) {
                    continue;
                } else {
                    return rc;
                }
            }
        }
        break;
    }
    return RCOK;
}
Ejemplo n.º 25
0
bool 
sortorder::unlexify(
    const key_type_s  *kp,
    const void *str, 
    void *res
) 
{
    FUNC(unlexify);
    keytype k = convert(kp);
    DBG(<<" k=" << int(k));
    switch(k) {
    case kt_nosuch:
    case kt_spatial:
         return false;
         break;

    case kt_i1:
        int_unlexify(str,  true, 1, res, I1perm);
        break;

    case kt_i2:
        /* XXX why aren't the alignment tools used for all of these? */
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_IU2) == 0x0);
        int_unlexify(str, true, 2,  res, I2perm);
        break;

    case kt_i4:
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_IU4) == 0x0);
        int_unlexify(str, true, 4, res, I4perm);
        break;

    case kt_i8:
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_IU8) == 0x0);
        int_unlexify(str, true, 8, res, I8perm);
        break;

    case kt_u1:
        int_unlexify(str, false, 1, res, I1perm);
        break;

    case kt_u2:
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_IU2) == 0x0);
        int_unlexify(str, false, 2, res, I2perm);
        break;

    case kt_u4:
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_IU4) == 0x0);
        int_unlexify(str, false, 4, res, I4perm);
        break;

    case kt_u8:
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_IU8) == 0x0);
        int_unlexify(str, false, 8, res, I8perm);
        break;

    case kt_f4:
        // should be at least 4-byte aligned
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_F4) == 0x0);
        float_unlexify(str, Fperm, (f4_t *)res);
        break;

    case kt_f8:
        // should be at least 4-byte aligned
        // architectures' alignment requirements
        // for doubles might differ.
        w_assert3(((ptrdiff_t)res & ALIGN_MASK_F8) == 0x0);
        dbl_unlexify(str, Dperm, (f8_t *)res);
        break;

    case kt_b:
        if(! kp->variable) {
             memcpy(res, str, kp->length);
        } else {
        return false;
        }
        break;
    }
    return true;
}
Ejemplo n.º 26
0
w_rc_t 
get_key_info(
    const rid_t&   W_IFDEBUG3(W_IFTRACE(rid)),  // record id
    const object_t&        obj_in,
    key_cookie_t            cookie,  // type info
    factory_t&                ,
    skey_t*                key
)
{
    int                        k = cookie.make_int();
    DBG(<<"get_key_info for key " << k);
    metadata*                meta = (metadata *)obj_in.hdr(0);

    // we shouldn't be called if this is the case
    w_assert1(meta[k].nullable || !meta[k].fixed); 

    DBG(<<"get_key_info for keys " << k << " offset=" 
            << meta[k].offset << " length=" << meta[k].length );

    new(key) skey_t(obj_in, meta[k].offset, meta[k].length, false);
#if W_DEBUG_LEVEL > 2
    if(1) 
    {
#undef DBG
#define DBG(x) cout x << endl;
        DBG(
        << "KEY " << k << " METADATA ARE: " 
        << " offset=" << meta[k].offset
        << " length=" << meta[k].length
        );

        DBG( << rid << " body (" 
            << (obj_in.body_size() - meta[k].offset) 
            << " bytes worth) = " );
        char *object = (char *)obj_in.body(meta[k].offset);

        if(meta[k].length > 0) {
        switch(meta[k].t) {
        case test_bv:
        case test_blarge:
        case test_b23:
        case test_b1: {
                    int  l = meta[k].length;
                    char *p = object;
                    if(l > 1) {
                        if(strlen(p) > 256) {
                            while (*p == 'E') p++; 
                        }
                        if(p-object > 0) {
                            DBG(<<" E(" <<(int)(p-object) <<" times)" << p);
                        } else {
                            while(*p) {
                                w_assert3(*p <= upper_alpha && *p >= lower_alpha);
                                p++;
                            }
                            DBG( << object);
                        }
                    } else {
                        // print as character
                        w_assert3(*p <= upper_alpha && *p >= lower_alpha);
                        DBG( << *object);
                    }
                } 
Ejemplo n.º 27
0
int main(int argc, const char** argv)
{
    argv0 = argv[0];


    bool print_stats = false;


    U.start();

    // Set up smsh related error codes
    if (! (w_error_t::insert(
                "ss_m shell",
                smsh_error_list, SSH_MAX_ERROR - SSH_MIN_ERROR - 1))) {
        abort();
    }


    /*
     * The following section of code sets up all the various options
     * for the program.  The following steps are performed:
        - determine the name of the program
        - setup an option group for the program
        - initialize the ssm options
        - scan default option configuration files ($HOME/.shoreconfig .shoreconfig)
        - process any options found on the command line
        - use getopt() to process smsh specific flags on the command line
        - check that all required options are set before initializing sm
     */         

    // set prog_name to the file name of the program
    const char* prog_name = strrchr(argv[0], '/');
    if (prog_name == NULL) {
            prog_name = argv[0];
    } else {
            prog_name += 1; /* skip the '/' */
            if (prog_name[0] == '\0')  {
                    prog_name = argv[0];
            }
    }

    /*
     * Set up and option group (list of options) for use by
     * all layers of the system.  Level "smsh" indicates
     * that the program is a a part to the smsh test suite.
     * Level "server" indicates
     * the type of program (the smsh server program).  The third
     * level is the program name itself.
     */
    option_group_t options(3);
    W_COERCE(options.add_class_level("smsh"));
    W_COERCE(options.add_class_level("server"));
    W_COERCE(options.add_class_level(prog_name));

    /*
     * Set up and smsh option for the name of the tcl library directory
     * and the name of the .smshrc file.
     */
    option_t* smsh_libdir;
    option_t* smsh_smshrc;
    W_COERCE(options.add_option("smsh_libdir", "directory name", NULL,
                "directory for smsh tcl libraries",
                true, option_t::set_value_charstr, smsh_libdir));
    W_COERCE(options.add_option("smsh_smshrc", "rc file name", ".smshrc",
                "full path name of the .smshrc file",
                false, option_t::set_value_charstr, smsh_smshrc));

    // have the sm add its options to the group
    W_COERCE(ss_m::setup_options(&options));


    /*
     * Scan the default configuration files: $HOME/.shoreconfig, .shoreconfig.  Note
     * That OS errors are ignored since it is not an error
     * for this file to not be found.
     */
    rc_t        rc;
    {
    char                opt_file[ss_m::max_devname+1];
    for(int file_num = 0; file_num < 2 && !rc.is_error(); file_num++) {
        // scan default option files
        w_ostrstream        err_stream;
        const char*        config = ".shoreconfig";
        if (file_num == 0) {
            if (!getenv("HOME")) {
                // ignore it ...
                // cerr << "Error: environment variable $HOME is not set" << endl;
                // rc = RC(SSH_FAILURE);
                break;
            }
            if (sizeof(opt_file) <= strlen(getenv("HOME")) + strlen("/") + strlen(config) + 1) {
                cerr << "Error: environment variable $HOME is too long" << endl;
                rc = RC(SSH_FAILURE);
                break;
            }
            strcpy(opt_file, getenv("HOME"));
            strcat(opt_file, "/");
            strcat(opt_file, config);
        } else {
            w_assert3(file_num == 1);
            strcpy(opt_file, "./");
            strcat(opt_file, config);
        }
        {
            option_file_scan_t opt_scan(opt_file, &options);
            rc = opt_scan.scan(true, err_stream);
            err_stream << ends;
            if (rc.is_error()) {
                // ignore OS error messages
                if (rc.err_num() == fcOS) {
                    rc = RCOK;
                } else {
                    // this error message is kind of gross but is
                    // sufficient for now
                    cerr << "Error in reading option file: " << opt_file << endl;
                    //cerr << "\t" << w_error_t::error_string(rc.err_num()) << endl;
                    cerr << "\t" << err_stream.c_str() << endl;
                }
            }
        }
    }
    }

    /* 
     * Assuming there has been no error so far, the command line
     * is processed for any options in the option group "options".
     */
    if (!rc.is_error()) {
        // parse command line
        w_ostrstream        err_stream;
        rc = options.parse_command_line(argv, argc, 2, &err_stream);
        err_stream << ends;
        if (rc.is_error()) {
            cerr << "Error on command line " << endl;
            cerr << "\t" << w_error_t::error_string(rc.err_num()) << endl;
            cerr << "\t" << err_stream.c_str() << endl;
            print_usage(cerr, prog_name, false, options);
        }
    } 

    /* 
     * Assuming there has been no error so far, the command line
     * is processed for any smsh specific flags.
     */
    int option;
    //if (!rc) 
    {  // do even if error so that smsh -h can be recognized
        bool verbose_opt = false; // print verbose option values
        while ((option = getopt(argc, (char * const*) argv, "Cf:hLOsTvV")) != -1) {
            switch (option) {
            case 'T':
                extern bool logtrace;
                logtrace = true;
                break;
            case 'O':
                    // Force use of old sort
                cout << "Force use of old sort implementation." <<endl;
                newsort = false;
                break;

            case 'C':
                // force compression of btrees
                force_compress = true;
                break;

            case 's':
                print_stats = true;
                break;

            case 'f':
                f_arg = optarg;
                break;

            case 'L':
                // use log warning callback
                log_warn_callback = true;
                break;

            case 'h':
                // print a help message describing options and flags
                print_usage(cerr, prog_name, true, options);
                // free rc structure to avoid complaints on exit
                W_IGNORE(rc);
                goto done;
                break;
            case 'v':
                verbose_opt = true;
                break;
            case 'V':
                verbose = true;
                break;
            default:
                cerr << "unknown flag: " << option << endl;
                rc = RC(SSH_COMMAND_LINE);
            }
        }

        if (verbose_opt) {
            options.print_values(false, cerr);
        }
    }

    /*
     * Assuming no error so far, check that all required options
     * in option_group_t options are set.  
     */
    if (!rc.is_error()) {
        // check required options
        w_ostrstream        err_stream;
        rc = options.check_required(&err_stream);
        err_stream << ends;
        if (rc.is_error()) {
            cerr << "These required options are not set:" << endl;
            cerr << err_stream.c_str() << endl;
            print_usage(cerr, prog_name, false, options);
        }
    } 


    /* 
     * If there have been any problems so far, then exit
     */
    if (rc.is_error()) {
        // free the rc error structure to avoid complaints on exit
        W_IGNORE(rc);
        goto errordone;
    }

    /*
     * At this point, all options and flags have been properly
     * set.  What follows is initialization for the rest of
     * the program.  The ssm will be started by a tcl_thread.
     */


    // setup table of sm commands - doesn't involve the Tcl_Interp
    dispatch_init();

    // set up the linked variables
    // either these should be read-only or
    // they need to be made thread-safe.  We can assume for smsh they
    // are for all purposes read-only, since only the mama thread sets
    // them in the scripts.
    linked.sm_page_sz = ss_m::page_sz;
    linked.sm_max_exts = ss_m::max_exts;
    linked.sm_max_vols = ss_m::max_vols;
    linked.sm_max_servers = ss_m::max_servers;
    linked.sm_max_keycomp = ss_m::max_keycomp;
    linked.sm_max_dir_cache = ss_m::max_dir_cache;
    linked.sm_max_rec_len = ss_m::max_rec_len;
    linked.sm_srvid_map_sz = ss_m::srvid_map_sz;
    linked.verbose_flag = verbose?1:0;
    linked.verbose2_flag = verbose2?1:0;
    linked.instrument_flag = instrument?1:0;
    linked.compress_flag = force_compress?1:0;
    linked.log_warn_callback_flag = log_warn_callback?1:0;

    {
        int tty = isatty(0);
        interactive = tty && f_arg;
    }

    // Create the main tcl_thread
    {
        tcl_thread_t* tcl_thread = NULL;
        bool ok = true;

        if(ok) {
            if (f_arg) {
                TCL_AV char* av[2];
                av[0] = TCL_AV1 "source";
                av[1] = f_arg;
                // smsh -f <file>
                tcl_thread = new tcl_thread_t(2, av, 
                                smsh_libdir->value(),
                                smsh_smshrc->value()
                                );
            } else {
                // interactive
                /*
                cerr << __func__ << " " << __LINE__ << " " << __FILE__
                << " INTERACTIVE libdir " << smsh_libdir->value()
                << " msshrc " << smsh_smshrc->value()
                << endl;
                */
                tcl_thread = new tcl_thread_t(0, 0,
                                smsh_libdir->value(),
                                smsh_smshrc->value()
                                );
            }
            assert(tcl_thread);

            W_COERCE( tcl_thread->fork() );
            W_COERCE( tcl_thread->join() );

            delete tcl_thread;
        }
    }


    // Shutdown TCL and have it deallocate resources still held!
    Tcl_Finalize();

    U.stop(1); // 1 iteration

    if(print_stats) 
    {
        cout << "Thread stats" <<endl;
        sthread_t::dump_stats(cout);
        cout << endl;

        cout << "Unix stats for parent:" <<endl;
        cout << U << endl << endl;

    }
    cout << flush;

done:
    clean_up_shell();
    fprintf(stderr, "%d tcl threads ran\n", num_tcl_threads_ttl);
    return 0;

errordone:
    clean_up_shell();
    return 1;
}