Exemplo n.º 1
0
static ham_status_t
__get_duplicate_table(dupe_table_t **table_ref, ham_page_t **page, ham_env_t *env, ham_u64_t table_id)
{
    ham_status_t st;
    blob_t hdr;
    ham_page_t *hdrpage=0;
    dupe_table_t *table;

	*page = 0;
    if (env_get_rt_flags(env)&HAM_IN_MEMORY_DB) {
        ham_u8_t *p=(ham_u8_t *)U64_TO_PTR(table_id); 
        *table_ref = (dupe_table_t *)(p+sizeof(hdr));
		return HAM_SUCCESS;
    }

	*table_ref = 0;

    /*
     * load the blob header
     */
    st=__read_chunk(env, 0, &hdrpage, table_id, (ham_u8_t *)&hdr, sizeof(hdr));
    if (st) {
        return st;
    }

    /*
     * if the whole table is in a page (and not split between several
     * pages), just return a pointer directly in the page
     */
    if (page_get_self(hdrpage)+env_get_usable_pagesize(env) >=
            table_id+blob_get_size(&hdr)) 
    {
        ham_u8_t *p=page_get_raw_payload(hdrpage);
        /* yes, table is in the page */
        *page=hdrpage;
        *table_ref = (dupe_table_t *)
                &p[table_id-page_get_self(hdrpage)+sizeof(hdr)];
		return HAM_SUCCESS;
    }

    /*
     * otherwise allocate memory for the table
     */
    table=allocator_alloc(env_get_allocator(env), (ham_size_t)blob_get_size(&hdr));
    if (!table) {
        return HAM_OUT_OF_MEMORY;
    }

    /*
     * then read the rest of the blob
     */
    st=__read_chunk(env, hdrpage, 0, table_id+sizeof(hdr), 
            (ham_u8_t *)table, (ham_size_t)blob_get_size(&hdr));
    if (st) {
        return st;
    }

    *table_ref = table;
	return HAM_SUCCESS;
}
Exemplo n.º 2
0
/**
 * write a series of data chunks to storage at file offset 'addr'.
 * 
 * The chunks are assumed to be stored in sequential order, adjacent
 * to each other, i.e. as one long data strip.
 * 
 * Writing is performed on a per-page basis, where special conditions
 * will decide whether or not the write operation is performed
 * through the page cache or directly to device; such is determined 
 * on a per-page basis.
 */
static ham_status_t
__write_chunks(ham_env_t *env, ham_page_t *page, ham_offset_t addr, 
        ham_bool_t allocated, ham_bool_t freshly_created, 
        ham_u8_t **chunk_data, ham_size_t *chunk_size, 
        ham_size_t chunks)
{
    ham_size_t i;
    ham_status_t st;
    ham_offset_t pageid;
    ham_device_t *device=env_get_device(env);
	ham_size_t pagesize = env_get_pagesize(env);

    ham_assert(freshly_created ? allocated : 1, (0));

    /*
     * for each chunk...
     */
    for (i=0; i<chunks; i++) {
        while (chunk_size[i]) {
            /*
             * get the page-ID from this chunk
             */
            pageid = addr - (addr % pagesize);

            /*
             * is this the current page?
             */
            if (page && page_get_self(page)!=pageid)
                page=0;

            /*
             * fetch the page from the cache, if it's in the cache
             * (unless we're logging - in this case always go through
             * the buffered routines)
             */
            if (!page) {
                /*
                 * keep pages in cache when they are located at the 'edges' of 
                 * the blob, as they MAY be accessed for different data.
                 * Of course, when a blob is small, there's only one (partial) 
                 * page accessed anyhow, so that one should end up in cache 
                 * then.
                 *
                 * When transaction logging is turned on, it's the same story, 
                 * really. We _could_ keep all those pages in cache now,
                 * but this would be thrashing the cache with blob data that's 
                 * accessed once only and for transaction abort (or commit)
                 * the amount of effort does not change.
                 *
                 * THOUGHT:
                 *
                 * Do we actually care what was in that page, which is going 
                 * to be overwritten in its entirety, BEFORE we do this, i.e. 
                 * before the transaction? 
                 *
                 * Answer: NO (and YES in special circumstances).
                 *
                 * Elaboration: As this would have been free space before, the 
                 * actual content does not matter, so it's not required to add
                 * the FULL pages written by the blob write action here to the 
                 * transaction log: even on transaction abort, that lingering 
                 * data is marked as 'bogus'/free as it was before anyhow.
                 *
                 * And then, assuming a longer running transaction, where this 
                 * page was freed during a previous action WITHIN
                 * the transaction, well, than the transaction log should 
                 * already carry this page's previous content as instructed 
                 * by the erase operation. HOWEVER, the erase operation would 
                 * not have a particular NEED to edit this page, as an erase op 
                 * is complete by just marking this space as free in the 
                 * freelist, resulting in the freelist pages (and the btree 
                 * pages) being the only ones being edited and ending up in 
                 * the transaction log then.
                 *
                 * Which means we'll have to log the previous content of these 
                 * pages to the transaction log anyhow. UNLESS, that is, when
                 * WE allocated these pages in the first place: then there 
                 * cannot be any 'pre-transaction' state of these pages 
                 * except that of 'not existing', i.e. 'free'. In which case, 
                 * their actual content doesn't matter! (freshly_created)
                 *
                 * And what if we have recovery logging turned on, but it's 
                 * not about an active transaction here?
                 * In that case, the recovery log would only log the OLD page 
                 * content, which we've concluded is insignificant, ever. Of 
                 * course, that's assuming (again!) that we're writing to 
                 * freshly created pages, which no-one has seen before. 
                 *
                 * Just as long as we can prevent this section from thrashing 
                 * the page cache, thank you very much...
                 */
                ham_bool_t at_blob_edge = (__blob_from_cache(env, chunk_size[i])
                        || (addr % pagesize) != 0 
                        || chunk_size[i] < pagesize);
                ham_bool_t cacheonly = (!at_blob_edge 
                                    && (!env_get_log(env)
                                        || freshly_created));
				//ham_assert(db_get_txn(db) ? !!env_get_log(env) : 1, (0));

                st=env_fetch_page(&page, env, pageid, 
                        cacheonly ? DB_ONLY_FROM_CACHE : 
                        at_blob_edge ? 0 : DB_NEW_PAGE_DOES_THRASH_CACHE);
				ham_assert(st ? !page : 1, (0));
                /* blob pages don't have a page header */
                if (page)
                {
                    page_set_npers_flags(page, 
                        page_get_npers_flags(page)|PAGE_NPERS_NO_HEADER);
                    /* if this page was recently allocated by the parent
                     * function: set a flag */
                    if (cacheonly 
                            && allocated 
                            && addr==page_get_self(page) 
                            && env_get_txn(env))
                        page_set_alloc_txn_id(page, txn_get_id(env_get_txn(env)));
                }
                else if (st) {
                    return st;
                }
            }

            /*
             * if we have a page pointer: use it; otherwise write directly
             * to the device
             */
            if (page) {
                ham_size_t writestart=
                        (ham_size_t)(addr-page_get_self(page));
                ham_size_t writesize =
                        (ham_size_t)(pagesize - writestart);
                if (writesize>chunk_size[i])
                    writesize=chunk_size[i];
                if ((st=ham_log_add_page_before(page)))
                    return (st);
                memcpy(&page_get_raw_payload(page)[writestart], chunk_data[i],
                            writesize);
                page_set_dirty(page, env);
                addr+=writesize;
                chunk_data[i]+=writesize;
                chunk_size[i]-=writesize;
            }
            else {
                ham_size_t s = chunk_size[i];
                /* limit to the next page boundary */
                if (s > pageid+pagesize-addr)
                    s = (ham_size_t)(pageid+pagesize-addr);

                ham_assert(env_get_log(env) ? freshly_created : 1, (0));

                st=device->write(device, addr, chunk_data[i], s);
                if (st)
                    return st;
                addr+=s;
                chunk_data[i]+=s;
                chunk_size[i]-=s;
            }
        }
    }

    return (0);
}
Exemplo n.º 3
0
static ham_status_t
__read_chunk(ham_env_t *env, ham_page_t *page, ham_page_t **fpage, 
        ham_offset_t addr, ham_u8_t *data, ham_size_t size)
{
    ham_status_t st;
    ham_device_t *device=env_get_device(env);

    while (size) {
        /*
         * get the page-ID from this chunk
         */
        ham_offset_t pageid;
		pageid = addr - (addr % env_get_pagesize(env));

        if (page) {
            if (page_get_self(page)!=pageid)
                page=0;
        }

        /*
         * is it the current page? if not, try to fetch the page from
         * the cache - but only read the page from disk, if the 
         * chunk is small
         */
        if (!page) {
            st=env_fetch_page(&page, env, pageid, 
                    __blob_from_cache(env, size) ? 0 : DB_ONLY_FROM_CACHE);
			ham_assert(st ? !page : 1, (0));
            /* blob pages don't have a page header */
            if (page)
                page_set_npers_flags(page, 
                    page_get_npers_flags(page)|PAGE_NPERS_NO_HEADER);
			else if (st)
				return st;
        }

        /*
         * if we have a page pointer: use it; otherwise read directly
         * from the device
         */
        if (page) {
            ham_size_t readstart=
                    (ham_size_t)(addr-page_get_self(page));
            ham_size_t readsize =
                    (ham_size_t)(env_get_pagesize(env)-readstart);
            if (readsize>size)
                readsize=size;
            memcpy(data, &page_get_raw_payload(page)[readstart], readsize);
            addr+=readsize;
            data+=readsize;
            size-=readsize;
        }
        else {
            ham_size_t s=(size<env_get_pagesize(env) 
                    ? size : env_get_pagesize(env));
            /* limit to the next page boundary */
            if (s>pageid+env_get_pagesize(env)-addr)
                s=(ham_size_t)(pageid+env_get_pagesize(env)-addr);

            st=device->read(device, addr, data, s);
            if (st) 
                return st;
            addr+=s;
            data+=s;
            size-=s;
        }
    }

    if (fpage)
        *fpage=page;

    return (0);
}
Exemplo n.º 4
0
/**                                                                 
 * create and initialize a new backend                              
 *                                                                  
 * @remark this function is called after the @a ham_db_t structure  
 * and the file were created                                        
 *                                                                  
 * the @a flags are stored in the database; only transfer           
 * the persistent flags!                                            
 *
 * @note This is a B+-tree 'backend' method.
 */                                                                 
static ham_status_t 
my_fun_create(ham_btree_t *be, ham_u16_t keysize, ham_u32_t flags)
{
    ham_status_t st;
    ham_page_t *root;
    ham_size_t maxkeys;
    ham_db_t *db=be_get_db(be);
    db_indexdata_t *indexdata=env_get_indexdata_ptr(db_get_env(db), 
                                db_get_indexdata_offset(db));
    if (be_is_active(be))
    {
        ham_trace(("backend has alread been initialized before!"));
        /* HAM_INTERNAL_ERROR -- not really, when keeping custom 
         * backends in mind */
        return HAM_ALREADY_INITIALIZED; 
    }

    /* 
     * prevent overflow - maxkeys only has 16 bit! 
     */
    maxkeys=btree_calc_maxkeys(env_get_pagesize(db_get_env(db)), keysize);
    if (maxkeys>MAX_KEYS_PER_NODE) {
        ham_trace(("keysize/pagesize ratio too high"));
        return HAM_INV_KEYSIZE;
    }
    else if (maxkeys==0) {
        ham_trace(("keysize too large for the current pagesize"));
        return HAM_INV_KEYSIZE;
    }

    /*
     * allocate a new root page
     */
    st=db_alloc_page(&root, db, PAGE_TYPE_B_ROOT, PAGE_IGNORE_FREELIST);
    ham_assert(st ? root == NULL : 1, (0));
    ham_assert(!st ? root != NULL : 1, (0));
    if (!root)
        return st ? st : HAM_INTERNAL_ERROR;

    memset(page_get_raw_payload(root), 0, 
            sizeof(btree_node_t)+sizeof(ham_perm_page_union_t));

    /*
     * calculate the maximum number of keys for this page, 
     * and make sure that this number is even
     */
    btree_set_maxkeys(be, (ham_u16_t)maxkeys);
    be_set_dirty(be, HAM_TRUE);
    be_set_keysize(be, keysize);
    be_set_flags(be, flags);

    btree_set_rootpage(be, page_get_self(root));

    index_clear_reserved(indexdata);
    index_set_max_keys(indexdata, (ham_u16_t)maxkeys);
    index_set_keysize(indexdata, keysize);
    index_set_self(indexdata, page_get_self(root));
    index_set_flags(indexdata, flags);
    index_set_recno(indexdata, 0);
    index_clear_reserved(indexdata);

    env_set_dirty(db_get_env(db));

    be_set_active(be, HAM_TRUE);

    return (0);
}