/*
BIG FAT WARNING:

This routine should NEVER be used like this:

  ham_txn_t txn;
  txn_begin(&txn, env, 0);
  ...
  txn_commit/abort(&txn);

in any (C/C++) environment where the code in the '...' may trigger out of band jumps, such as longjmp()
to an outer layer or a C++ exception, as the transaction 'txn' will be bound to the 'db' structure
internally and cause a CORE DUMP once the 'db' structure is closed (and cleaned up) as then, in the
outer layer exception handler, the 'txn' stack space will have been NUKED.

This shortcutting style of coding was used throughout the unittests and it was waiting for the axe to fall...

It is also used within the hamsterdb C code itself, which is perfectly fine as this library does not
call any exception throwing code... UNLESS OF COURSE such sort of code is to be found in ANY of the
registered hooks/callbacks!

Hence any callbacks which get registered with hamsterDB should NEVER allow any C longjmp() or C++ exception
to pass /through/ the hamsterdb layer itself, or a core dump at ham_close/ham_env_close invocation
will be your share.
*/
ham_status_t
txn_begin(ham_txn_t *txn, ham_env_t *env, ham_u32_t flags)
{
    ham_status_t st=0;

    /* for hamsterdb 1.0.4 - only support one transaction */
    if (env_get_txn(env)) {
        ham_trace(("only one concurrent transaction is supported"));
        return (HAM_LIMITS_REACHED);
    }

    memset(txn, 0, sizeof(*txn));
    txn_set_env(txn, env);
    txn_set_id(txn, env_get_txn_id(env)+1);
    txn_set_flags(txn, flags);
    env_set_txn(env, txn);
    env_set_txn_id(env, txn_get_id(txn));

    if (env_get_log(env) && !(flags&HAM_TXN_READ_ONLY))
        st=ham_log_append_txn_begin(env_get_log(env), txn);

    return st;
}
Example #2
0
/**
 * write a series of data chunks to storage at file offset 'addr'.
 * 
 * The chunks are assumed to be stored in sequential order, adjacent
 * to each other, i.e. as one long data strip.
 * 
 * Writing is performed on a per-page basis, where special conditions
 * will decide whether or not the write operation is performed
 * through the page cache or directly to device; such is determined 
 * on a per-page basis.
 */
static ham_status_t
__write_chunks(ham_env_t *env, ham_page_t *page, ham_offset_t addr, 
        ham_bool_t allocated, ham_bool_t freshly_created, 
        ham_u8_t **chunk_data, ham_size_t *chunk_size, 
        ham_size_t chunks)
{
    ham_size_t i;
    ham_status_t st;
    ham_offset_t pageid;
    ham_device_t *device=env_get_device(env);
	ham_size_t pagesize = env_get_pagesize(env);

    ham_assert(freshly_created ? allocated : 1, (0));

    /*
     * for each chunk...
     */
    for (i=0; i<chunks; i++) {
        while (chunk_size[i]) {
            /*
             * get the page-ID from this chunk
             */
            pageid = addr - (addr % pagesize);

            /*
             * is this the current page?
             */
            if (page && page_get_self(page)!=pageid)
                page=0;

            /*
             * fetch the page from the cache, if it's in the cache
             * (unless we're logging - in this case always go through
             * the buffered routines)
             */
            if (!page) {
                /*
                 * keep pages in cache when they are located at the 'edges' of 
                 * the blob, as they MAY be accessed for different data.
                 * Of course, when a blob is small, there's only one (partial) 
                 * page accessed anyhow, so that one should end up in cache 
                 * then.
                 *
                 * When transaction logging is turned on, it's the same story, 
                 * really. We _could_ keep all those pages in cache now,
                 * but this would be thrashing the cache with blob data that's 
                 * accessed once only and for transaction abort (or commit)
                 * the amount of effort does not change.
                 *
                 * THOUGHT:
                 *
                 * Do we actually care what was in that page, which is going 
                 * to be overwritten in its entirety, BEFORE we do this, i.e. 
                 * before the transaction? 
                 *
                 * Answer: NO (and YES in special circumstances).
                 *
                 * Elaboration: As this would have been free space before, the 
                 * actual content does not matter, so it's not required to add
                 * the FULL pages written by the blob write action here to the 
                 * transaction log: even on transaction abort, that lingering 
                 * data is marked as 'bogus'/free as it was before anyhow.
                 *
                 * And then, assuming a longer running transaction, where this 
                 * page was freed during a previous action WITHIN
                 * the transaction, well, than the transaction log should 
                 * already carry this page's previous content as instructed 
                 * by the erase operation. HOWEVER, the erase operation would 
                 * not have a particular NEED to edit this page, as an erase op 
                 * is complete by just marking this space as free in the 
                 * freelist, resulting in the freelist pages (and the btree 
                 * pages) being the only ones being edited and ending up in 
                 * the transaction log then.
                 *
                 * Which means we'll have to log the previous content of these 
                 * pages to the transaction log anyhow. UNLESS, that is, when
                 * WE allocated these pages in the first place: then there 
                 * cannot be any 'pre-transaction' state of these pages 
                 * except that of 'not existing', i.e. 'free'. In which case, 
                 * their actual content doesn't matter! (freshly_created)
                 *
                 * And what if we have recovery logging turned on, but it's 
                 * not about an active transaction here?
                 * In that case, the recovery log would only log the OLD page 
                 * content, which we've concluded is insignificant, ever. Of 
                 * course, that's assuming (again!) that we're writing to 
                 * freshly created pages, which no-one has seen before. 
                 *
                 * Just as long as we can prevent this section from thrashing 
                 * the page cache, thank you very much...
                 */
                ham_bool_t at_blob_edge = (__blob_from_cache(env, chunk_size[i])
                        || (addr % pagesize) != 0 
                        || chunk_size[i] < pagesize);
                ham_bool_t cacheonly = (!at_blob_edge 
                                    && (!env_get_log(env)
                                        || freshly_created));
				//ham_assert(db_get_txn(db) ? !!env_get_log(env) : 1, (0));

                st=env_fetch_page(&page, env, pageid, 
                        cacheonly ? DB_ONLY_FROM_CACHE : 
                        at_blob_edge ? 0 : DB_NEW_PAGE_DOES_THRASH_CACHE);
				ham_assert(st ? !page : 1, (0));
                /* blob pages don't have a page header */
                if (page)
                {
                    page_set_npers_flags(page, 
                        page_get_npers_flags(page)|PAGE_NPERS_NO_HEADER);
                    /* if this page was recently allocated by the parent
                     * function: set a flag */
                    if (cacheonly 
                            && allocated 
                            && addr==page_get_self(page) 
                            && env_get_txn(env))
                        page_set_alloc_txn_id(page, txn_get_id(env_get_txn(env)));
                }
                else if (st) {
                    return st;
                }
            }

            /*
             * if we have a page pointer: use it; otherwise write directly
             * to the device
             */
            if (page) {
                ham_size_t writestart=
                        (ham_size_t)(addr-page_get_self(page));
                ham_size_t writesize =
                        (ham_size_t)(pagesize - writestart);
                if (writesize>chunk_size[i])
                    writesize=chunk_size[i];
                if ((st=ham_log_add_page_before(page)))
                    return (st);
                memcpy(&page_get_raw_payload(page)[writestart], chunk_data[i],
                            writesize);
                page_set_dirty(page, env);
                addr+=writesize;
                chunk_data[i]+=writesize;
                chunk_size[i]-=writesize;
            }
            else {
                ham_size_t s = chunk_size[i];
                /* limit to the next page boundary */
                if (s > pageid+pagesize-addr)
                    s = (ham_size_t)(pageid+pagesize-addr);

                ham_assert(env_get_log(env) ? freshly_created : 1, (0));

                st=device->write(device, addr, chunk_data[i], s);
                if (st)
                    return st;
                addr+=s;
                chunk_data[i]+=s;
                chunk_size[i]-=s;
            }
        }
    }

    return (0);
}
Example #3
0
static ham_status_t
__insert_cursor(ham_btree_t *be, ham_key_t *key, ham_record_t *record, 
                ham_bt_cursor_t *cursor, insert_hints_t *hints)
{
    ham_status_t st;
    ham_page_t *root;
    ham_db_t *db=be_get_db(be);
    ham_env_t *env = db_get_env(db);
    insert_scratchpad_t scratchpad;

    ham_assert(hints->force_append == HAM_FALSE, (0));
    ham_assert(hints->force_prepend == HAM_FALSE, (0));

    /* 
     * initialize the scratchpad 
     */
    memset(&scratchpad, 0, sizeof(scratchpad));
    scratchpad.be=be;
    scratchpad.record=record;
    scratchpad.cursor=cursor;

    /* 
     * get the root-page...
     */
    ham_assert(btree_get_rootpage(be)!=0, ("btree has no root page"));
    st=db_fetch_page(&root, db, btree_get_rootpage(be), 0);
    ham_assert(st ? root == NULL : 1, (0));
    if (st)
        return st;

    /* 
     * ... and start the recursion 
     */
    st=__insert_recursive(root, key, 0, &scratchpad, hints);

    /*
     * if the root page was split, we have to create a new
     * root page.
     */
    if (st==SPLIT) {
        ham_page_t *newroot;
        btree_node_t *node;

        /*
         * the root-page will be changed...
         */
        st=ham_log_add_page_before(root);
        if (st)
            return (st);

        /*
         * allocate a new root page
         */
        st=db_alloc_page(&newroot, db, PAGE_TYPE_B_ROOT, 0); 
        ham_assert(st ? newroot == NULL : 1, (0));
        if (st)
            return (st);
        ham_assert(page_get_owner(newroot), (""));
        /* clear the node header */
        memset(page_get_payload(newroot), 0, sizeof(btree_node_t));

        stats_page_is_nuked(db, root, HAM_TRUE);

        /* 
         * insert the pivot element and the ptr_left
         */ 
        node=ham_page_get_btree_node(newroot);
        btree_node_set_ptr_left(node, btree_get_rootpage(be));
        st=__insert_nosplit(newroot, &scratchpad.key, 
                scratchpad.rid, scratchpad.record, scratchpad.cursor, 
                hints);
        ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0));
        scratchpad.cursor=0; /* don't overwrite cursor if __insert_nosplit
                                is called again */
        if (st) {
            ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0));
            if (scratchpad.key.data)
                allocator_free(env_get_allocator(env), scratchpad.key.data);
            return (st);
        }

        /*
         * set the new root page
         *
         * !!
         * do NOT delete the old root page - it's still in use!
         *
         * also don't forget to flush the backend - otherwise the header
         * page of the database will not contain the updated information.
         * The backend is flushed when the database is closed, but if 
         * recovery is enabled then the flush here is critical.
         */
        btree_set_rootpage(be, page_get_self(newroot));
        be_set_dirty(be, HAM_TRUE);
        be->_fun_flush(be);

        /*
         * As we re-purpose a page, we will reset its pagecounter
         * as well to signal its first use as the new type assigned
         * here.
         */
        if (env_get_cache(env) && (page_get_type(root)!=PAGE_TYPE_B_INDEX))
            cache_update_page_access_counter(root, env_get_cache(env), 0);

        page_set_type(root, PAGE_TYPE_B_INDEX);
        page_set_dirty(root, env);
        page_set_dirty(newroot, env);

        /* the root page was modified (btree_set_rootpage) - make sure that
         * it's logged */
        if (env_get_rt_flags(env)&HAM_ENABLE_RECOVERY) {
            st=txn_add_page(env_get_txn(env), env_get_header_page(env),
                    HAM_TRUE);
            if (st)
                return (st);
        }
    }

    /*
     * release the scratchpad-memory and return to caller
     */
    ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0));
    if (scratchpad.key.data)
        allocator_free(env_get_allocator(env), scratchpad.key.data);

    return (st);
}