ham_status_t blob_duplicate_get(ham_env_t *env, ham_offset_t table_id, ham_size_t position, dupe_entry_t *entry) { ham_status_t st; dupe_table_t *table; ham_page_t *page=0; st = __get_duplicate_table(&table, &page, env, table_id); ham_assert(st ? table == NULL : 1, (0)); ham_assert(st ? page == NULL : 1, (0)); if (!table) return st ? st : HAM_INTERNAL_ERROR; if (position>=dupe_table_get_count(table)) { if (!(env_get_rt_flags(env)&HAM_IN_MEMORY_DB)) if (!page) allocator_free(env_get_allocator(env), table); return HAM_KEY_NOT_FOUND; } memcpy(entry, dupe_table_get_entry(table, position), sizeof(*entry)); if (!(env_get_rt_flags(env)&HAM_IN_MEMORY_DB)) { if (!page) allocator_free(env_get_allocator(env), table); } return (0); }
ham_status_t blob_get_datasize(ham_db_t *db, ham_offset_t blobid, ham_offset_t *size) { ham_status_t st; ham_page_t *page; blob_t hdr; /* * in-memory-database: the blobid is actually a pointer to the memory * buffer, in which the blob is stored */ if (env_get_rt_flags(db_get_env(db))&HAM_IN_MEMORY_DB) { blob_t *hdr=(blob_t *)U64_TO_PTR(blobid); *size=blob_get_size(hdr); return (0); } ham_assert(blobid%DB_CHUNKSIZE==0, ("blobid is %llu", blobid)); /* read the blob header */ st=__read_chunk(db_get_env(db), 0, &page, blobid, (ham_u8_t *)&hdr, sizeof(hdr)); if (st) return (st); ham_assert(blob_get_alloc_size(&hdr)%DB_CHUNKSIZE==0, (0)); if (blob_get_self(&hdr)!=blobid) return (HAM_BLOB_NOT_FOUND); *size=blob_get_size(&hdr); return (0); }
static ham_status_t __get_duplicate_table(dupe_table_t **table_ref, ham_page_t **page, ham_env_t *env, ham_u64_t table_id) { ham_status_t st; blob_t hdr; ham_page_t *hdrpage=0; dupe_table_t *table; *page = 0; if (env_get_rt_flags(env)&HAM_IN_MEMORY_DB) { ham_u8_t *p=(ham_u8_t *)U64_TO_PTR(table_id); *table_ref = (dupe_table_t *)(p+sizeof(hdr)); return HAM_SUCCESS; } *table_ref = 0; /* * load the blob header */ st=__read_chunk(env, 0, &hdrpage, table_id, (ham_u8_t *)&hdr, sizeof(hdr)); if (st) { return st; } /* * if the whole table is in a page (and not split between several * pages), just return a pointer directly in the page */ if (page_get_self(hdrpage)+env_get_usable_pagesize(env) >= table_id+blob_get_size(&hdr)) { ham_u8_t *p=page_get_raw_payload(hdrpage); /* yes, table is in the page */ *page=hdrpage; *table_ref = (dupe_table_t *) &p[table_id-page_get_self(hdrpage)+sizeof(hdr)]; return HAM_SUCCESS; } /* * otherwise allocate memory for the table */ table=allocator_alloc(env_get_allocator(env), (ham_size_t)blob_get_size(&hdr)); if (!table) { return HAM_OUT_OF_MEMORY; } /* * then read the rest of the blob */ st=__read_chunk(env, hdrpage, 0, table_id+sizeof(hdr), (ham_u8_t *)table, (ham_size_t)blob_get_size(&hdr)); if (st) { return st; } *table_ref = table; return HAM_SUCCESS; }
static ham_status_t _remote_fun_open(ham_env_t *env, const char *filename, ham_u32_t flags, const ham_parameter_t *param) { ham_status_t st; proto_wrapper_t *request, *reply; CURL *handle=curl_easy_init(); request=proto_init_connect_request(filename); st=_perform_request(env, handle, request, &reply); proto_delete(request); if (st) { curl_easy_cleanup(handle); if (reply) proto_delete(reply); return (st); } ham_assert(reply!=0, ("")); ham_assert(proto_has_connect_reply(reply), ("")); st=proto_connect_reply_get_status(reply); if (st==0) { env_set_curl(env, handle); env_set_rt_flags(env, env_get_rt_flags(env) |proto_connect_reply_get_env_flags(reply)); } proto_delete(reply); return (st); }
ham_status_t env_initialize_remote(ham_env_t *env) { #if HAM_ENABLE_REMOTE env->_fun_create =_remote_fun_create; env->_fun_open =_remote_fun_open; env->_fun_rename_db =_remote_fun_rename_db; env->_fun_erase_db =_remote_fun_erase_db; env->_fun_get_database_names =_remote_fun_get_database_names; env->_fun_get_parameters =_remote_fun_env_get_parameters; env->_fun_flush =_remote_fun_env_flush; env->_fun_create_db =_remote_fun_create_db; env->_fun_open_db =_remote_fun_open_db; env->_fun_close =_remote_fun_env_close; env->_fun_txn_begin =_remote_fun_txn_begin; env->_fun_txn_commit =_remote_fun_txn_commit; env->_fun_txn_abort =_remote_fun_txn_abort; env_set_rt_flags(env, env_get_rt_flags(env)|DB_IS_REMOTE); #else return (HAM_NOT_IMPLEMENTED); #endif return (0); }
void extkey_cache_destroy(extkey_cache_t *cache) { ham_size_t i; extkey_t *e, *n; ham_db_t *db=extkey_cache_get_db(cache); ham_env_t *env = db_get_env(db); /* * make sure that all entries are empty */ for (i=0; i<extkey_cache_get_bucketsize(cache); i++) { e=extkey_cache_get_bucket(cache, i); while (e) { #if HAM_DEBUG /* * make sure that the extkey-cache is empty - only for in-memory * databases and DEBUG builds. */ if (env_get_rt_flags(env)&HAM_IN_MEMORY_DB) ham_assert(!"extkey-cache is not empty!", (0)); #endif n=extkey_get_next(e); allocator_free(env_get_allocator(env), e); e=n; } } allocator_free(env_get_allocator(env), cache); }
/** * Remove all extended keys for the given @a page from the * extended key cache. */ static ham_status_t my_fun_free_page_extkeys(ham_btree_t *be, ham_page_t *page, ham_u32_t flags) { ham_db_t *db=be_get_db(be); ham_assert(page_get_owner(page) == db, (0)); ham_assert(0 == (flags & ~DB_MOVE_TO_FREELIST), (0)); /* * if this page has a header, and it's either a B-Tree root page or * a B-Tree index page: remove all extended keys from the cache, * and/or free their blobs */ if (page_get_pers(page) && (!(page_get_npers_flags(page)&PAGE_NPERS_NO_HEADER)) && (page_get_type(page)==PAGE_TYPE_B_ROOT || page_get_type(page)==PAGE_TYPE_B_INDEX)) { ham_size_t i; ham_offset_t blobid; int_key_t *bte; btree_node_t *node=ham_page_get_btree_node(page); extkey_cache_t *c; ham_assert(db, ("Must be set as page owner when this is a Btree page")); ham_assert(db=page_get_owner(page), ("")); c=db_get_extkey_cache(db); for (i=0; i<btree_node_get_count(node); i++) { bte=btree_node_get_key(db, node, i); if (key_get_flags(bte)&KEY_IS_EXTENDED) { blobid=key_get_extended_rid(db, bte); if (env_get_rt_flags(db_get_env(db))&HAM_IN_MEMORY_DB) { /* delete the blobid to prevent that it's freed twice */ *(ham_offset_t *)(key_get_key(bte)+ (db_get_keysize(db)-sizeof(ham_offset_t)))=0; } //(void)key_erase_record(db, bte, 0, BLOB_FREE_ALL_DUPES); if (c) (void)extkey_cache_remove(c, blobid); } } } return (HAM_SUCCESS); }
ham_status_t blob_free(ham_env_t *env, ham_db_t *db, ham_offset_t blobid, ham_u32_t flags) { ham_status_t st; blob_t hdr; /* * in-memory-database: the blobid is actually a pointer to the memory * buffer, in which the blob is stored */ if (env_get_rt_flags(env)&HAM_IN_MEMORY_DB) { allocator_free(env_get_allocator(env), (void *)U64_TO_PTR(blobid)); return (0); } ham_assert(blobid%DB_CHUNKSIZE==0, (0)); /* * fetch the blob header */ st=__read_chunk(env, 0, 0, blobid, (ham_u8_t *)&hdr, sizeof(hdr)); if (st) return (st); ham_assert(blob_get_alloc_size(&hdr)%DB_CHUNKSIZE==0, (0)); /* * sanity check */ ham_assert(blob_get_self(&hdr)==blobid, ("invalid blobid %llu != %llu", blob_get_self(&hdr), blobid)); if (blob_get_self(&hdr)!=blobid) return (HAM_BLOB_NOT_FOUND); /* * move the blob to the freelist */ st = freel_mark_free(env, db, blobid, (ham_size_t)blob_get_alloc_size(&hdr), HAM_FALSE); ham_assert(!st, ("unexpected error, at least not covered in the old code")); return st; }
void db_update_global_stats_erase_query(ham_db_t *db, ham_size_t key_size) { ham_env_t *env = db_get_env(db); if (!(env_get_rt_flags(env)&HAM_IN_MEMORY_DB)) { ham_runtime_statistics_globdata_t *globalstats = env_get_global_perf_data(env); ham_runtime_statistics_opdbdata_t *opstats = db_get_op_perf_data(db, HAM_OPERATION_STATS_ERASE); #ifdef HAM_DEBUG ham_u16_t bucket = ham_bitcount2bucket_index(key_size / DB_CHUNKSIZE); ham_assert(bucket < HAM_FREELIST_SLOT_SPREAD, (0)); //ham_assert(device_get_freelist_cache(dev), (0)); #endif globalstats->erase_query_count++; opstats->query_count++; } }
ham_status_t blob_overwrite(ham_env_t *env, ham_db_t *db, ham_offset_t old_blobid, ham_record_t *record, ham_u32_t flags, ham_offset_t *new_blobid) { ham_status_t st; ham_size_t alloc_size; blob_t old_hdr; blob_t new_hdr; ham_page_t *page; /* * PARTIAL WRITE * * if offset+partial_size equals the full record size, then we won't * have any gaps. In this case we just write the full record and ignore * the partial parameters. */ if (flags&HAM_PARTIAL) { if (record->partial_offset==0 && record->partial_offset+record->partial_size==record->size) flags&=~HAM_PARTIAL; } /* * inmemory-databases: free the old blob, * allocate a new blob (but if both sizes are equal, just overwrite * the data) */ if (env_get_rt_flags(env)&HAM_IN_MEMORY_DB) { blob_t *nhdr, *phdr=(blob_t *)U64_TO_PTR(old_blobid); if (blob_get_size(phdr)==record->size) { ham_u8_t *p=(ham_u8_t *)phdr; if (flags&HAM_PARTIAL) { memmove(p+sizeof(blob_t)+record->partial_offset, record->data, record->partial_size); } else { memmove(p+sizeof(blob_t), record->data, record->size); } *new_blobid=(ham_offset_t)PTR_TO_U64(phdr); } else { st=blob_allocate(env, db, record, flags, new_blobid); if (st) return (st); nhdr=(blob_t *)U64_TO_PTR(*new_blobid); blob_set_flags(nhdr, blob_get_flags(phdr)); allocator_free(env_get_allocator(env), phdr); } return (HAM_SUCCESS); } ham_assert(old_blobid%DB_CHUNKSIZE==0, (0)); /* * blobs are CHUNKSIZE-allocated */ alloc_size=sizeof(blob_t)+record->size; alloc_size += DB_CHUNKSIZE - 1; alloc_size -= alloc_size % DB_CHUNKSIZE; /* * first, read the blob header; if the new blob fits into the * old blob, we overwrite the old blob (and add the remaining * space to the freelist, if there is any) */ st=__read_chunk(env, 0, &page, old_blobid, (ham_u8_t *)&old_hdr, sizeof(old_hdr)); if (st) return (st); ham_assert(blob_get_alloc_size(&old_hdr)%DB_CHUNKSIZE==0, (0)); /* * sanity check */ ham_verify(blob_get_self(&old_hdr)==old_blobid, ("invalid blobid %llu != %llu", blob_get_self(&old_hdr), old_blobid)); if (blob_get_self(&old_hdr)!=old_blobid) return (HAM_BLOB_NOT_FOUND); /* * now compare the sizes; does the new data fit in the old allocated * space? */ if (alloc_size<=blob_get_alloc_size(&old_hdr)) { ham_u8_t *chunk_data[2]; ham_size_t chunk_size[2]; /* * setup the new blob header */ blob_set_self(&new_hdr, blob_get_self(&old_hdr)); blob_set_size(&new_hdr, record->size); blob_set_flags(&new_hdr, blob_get_flags(&old_hdr)); if (blob_get_alloc_size(&old_hdr)-alloc_size>SMALLEST_CHUNK_SIZE) blob_set_alloc_size(&new_hdr, alloc_size); else blob_set_alloc_size(&new_hdr, blob_get_alloc_size(&old_hdr)); /* * PARTIAL WRITE * * if we have a gap at the beginning, then we have to write the * blob header and the blob data in two steps; otherwise we can * write both immediately */ if ((flags&HAM_PARTIAL) && (record->partial_offset)) { chunk_data[0]=(ham_u8_t *)&new_hdr; chunk_size[0]=sizeof(new_hdr); st=__write_chunks(env, page, blob_get_self(&new_hdr), HAM_FALSE, HAM_FALSE, chunk_data, chunk_size, 1); if (st) return (st); chunk_data[0]=record->data; chunk_size[0]=record->partial_size; st=__write_chunks(env, page, blob_get_self(&new_hdr)+sizeof(new_hdr) +record->partial_offset, HAM_FALSE, HAM_FALSE, chunk_data, chunk_size, 1); if (st) return (st); } else { chunk_data[0]=(ham_u8_t *)&new_hdr; chunk_size[0]=sizeof(new_hdr); chunk_data[1]=record->data; chunk_size[1]=(flags&HAM_PARTIAL) ? record->partial_size : record->size; st=__write_chunks(env, page, blob_get_self(&new_hdr), HAM_FALSE, HAM_FALSE, chunk_data, chunk_size, 2); if (st) return (st); } /* * move remaining data to the freelist */ if (blob_get_alloc_size(&old_hdr)!=blob_get_alloc_size(&new_hdr)) { (void)freel_mark_free(env, db, blob_get_self(&new_hdr)+blob_get_alloc_size(&new_hdr), (ham_size_t)(blob_get_alloc_size(&old_hdr)- blob_get_alloc_size(&new_hdr)), HAM_FALSE); } /* * the old rid is the new rid */ *new_blobid=blob_get_self(&new_hdr); return (HAM_SUCCESS); } else { /* * when the new data is larger, allocate a fresh space for it * and discard the old; 'overwrite' has become (delete + insert) now. */ st=blob_allocate(env, db, record, flags, new_blobid); if (st) return (st); (void)freel_mark_free(env, db, old_blobid, (ham_size_t)blob_get_alloc_size(&old_hdr), HAM_FALSE); } return (HAM_SUCCESS); }
ham_status_t blob_read(ham_db_t *db, ham_offset_t blobid, ham_record_t *record, ham_u32_t flags) { ham_status_t st; ham_page_t *page; blob_t hdr; ham_size_t blobsize=0; /* * in-memory-database: the blobid is actually a pointer to the memory * buffer, in which the blob is stored */ if (env_get_rt_flags(db_get_env(db))&HAM_IN_MEMORY_DB) { blob_t *hdr=(blob_t *)U64_TO_PTR(blobid); ham_u8_t *data=(ham_u8_t *)(U64_TO_PTR(blobid))+sizeof(blob_t); /* when the database is closing, the header is already deleted */ if (!hdr) { record->size = 0; return (0); } blobsize = (ham_size_t)blob_get_size(hdr); if (flags&HAM_PARTIAL) { if (record->partial_offset>blobsize) { ham_trace(("partial offset is greater than the total " "record size")); return (HAM_INV_PARAMETER); } if (record->partial_offset+record->partial_size>blobsize) blobsize=blobsize-record->partial_offset; else blobsize=record->partial_size; } if (!blobsize) { /* empty blob? */ record->data = 0; record->size = 0; } else { ham_u8_t *d=data; if (flags&HAM_PARTIAL) d+=record->partial_offset; if ((flags&HAM_DIRECT_ACCESS) && !(record->flags&HAM_RECORD_USER_ALLOC)) { record->size=blobsize; record->data=d; } else { /* resize buffer, if necessary */ if (!(record->flags & HAM_RECORD_USER_ALLOC)) { st=db_resize_record_allocdata(db, blobsize); if (st) return (st); record->data = db_get_record_allocdata(db); } /* and copy the data */ memcpy(record->data, d, blobsize); record->size = blobsize; } } return (0); } ham_assert(blobid%DB_CHUNKSIZE==0, ("blobid is %llu", blobid)); /* * first step: read the blob header */ st=__read_chunk(db_get_env(db), 0, &page, blobid, (ham_u8_t *)&hdr, sizeof(hdr)); if (st) return (st); ham_assert(blob_get_alloc_size(&hdr)%DB_CHUNKSIZE==0, (0)); /* * sanity check */ if (blob_get_self(&hdr)!=blobid) return (HAM_BLOB_NOT_FOUND); blobsize = (ham_size_t)blob_get_size(&hdr); if (flags&HAM_PARTIAL) { if (record->partial_offset>blobsize) { ham_trace(("partial offset+size is greater than the total " "record size")); return (HAM_INV_PARAMETER); } if (record->partial_offset+record->partial_size>blobsize) blobsize=blobsize-record->partial_offset; else blobsize=record->partial_size; } /* * empty blob? */ if (!blobsize) { record->data = 0; record->size = 0; return (0); } /* * second step: resize the blob buffer */ if (!(record->flags & HAM_RECORD_USER_ALLOC)) { st=db_resize_record_allocdata(db, blobsize); if (st) return (st); record->data = db_get_record_allocdata(db); } /* * third step: read the blob data */ st=__read_chunk(db_get_env(db), page, 0, blobid+sizeof(blob_t)+(flags&HAM_PARTIAL ? record->partial_offset : 0), record->data, blobsize); if (st) return (st); record->size = blobsize; return (0); }
/** * Allocate space in storage for and write the content references by 'data' * (and length 'size') to storage. * * Conditions will apply whether the data is written through cache or direct * to device. * * The content is, of course, prefixed by a BLOB header. * * Partial writes are handled in this function. */ ham_status_t blob_allocate(ham_env_t *env, ham_db_t *db, ham_record_t *record, ham_u32_t flags, ham_offset_t *blobid) { ham_status_t st; ham_page_t *page=0; ham_offset_t addr; blob_t hdr; ham_u8_t *chunk_data[2]; ham_size_t alloc_size; ham_size_t chunk_size[2]; ham_device_t *device=env_get_device(env); ham_bool_t freshly_created = HAM_FALSE; *blobid=0; /* * PARTIAL WRITE * * if offset+partial_size equals the full record size, then we won't * have any gaps. In this case we just write the full record and ignore * the partial parameters. */ if (flags&HAM_PARTIAL) { if (record->partial_offset==0 && record->partial_offset+record->partial_size==record->size) flags&=~HAM_PARTIAL; } /* * in-memory-database: the blobid is actually a pointer to the memory * buffer, in which the blob (with the blob-header) is stored */ if (env_get_rt_flags(env)&HAM_IN_MEMORY_DB) { blob_t *hdr; ham_u8_t *p=(ham_u8_t *)allocator_alloc(env_get_allocator(env), record->size+sizeof(blob_t)); if (!p) { return HAM_OUT_OF_MEMORY; } /* initialize the header */ hdr=(blob_t *)p; memset(hdr, 0, sizeof(*hdr)); blob_set_self(hdr, (ham_offset_t)PTR_TO_U64(p)); blob_set_alloc_size(hdr, record->size+sizeof(blob_t)); blob_set_size(hdr, record->size); /* do we have gaps? if yes, fill them with zeroes */ if (flags&HAM_PARTIAL) { ham_u8_t *s=p+sizeof(blob_t); if (record->partial_offset) memset(s, 0, record->partial_offset); memcpy(s+record->partial_offset, record->data, record->partial_size); if (record->partial_offset+record->partial_size<record->size) memset(s+record->partial_offset+record->partial_size, 0, record->size-(record->partial_offset+record->partial_size)); } else { memcpy(p+sizeof(blob_t), record->data, record->size); } *blobid=(ham_offset_t)PTR_TO_U64(p); return (0); } memset(&hdr, 0, sizeof(hdr)); /* * blobs are CHUNKSIZE-allocated */ alloc_size=sizeof(blob_t)+record->size; alloc_size += DB_CHUNKSIZE - 1; alloc_size -= alloc_size % DB_CHUNKSIZE; /* * check if we have space in the freelist */ st = freel_alloc_area(&addr, env, db, alloc_size); if (!addr) { if (st) return st; /* * if the blob is small AND if logging is disabled: load the page * through the cache */ if (__blob_from_cache(env, alloc_size)) { st = db_alloc_page(&page, db, PAGE_TYPE_BLOB, PAGE_IGNORE_FREELIST); ham_assert(st ? page == NULL : 1, (0)); ham_assert(!st ? page != NULL : 1, (0)); if (st) return st; /* blob pages don't have a page header */ page_set_npers_flags(page, page_get_npers_flags(page)|PAGE_NPERS_NO_HEADER); addr=page_get_self(page); /* move the remaining space to the freelist */ (void)freel_mark_free(env, db, addr+alloc_size, env_get_pagesize(env)-alloc_size, HAM_FALSE); blob_set_alloc_size(&hdr, alloc_size); } else { /* * otherwise use direct IO to allocate the space */ ham_size_t aligned=alloc_size; aligned += env_get_pagesize(env) - 1; aligned -= aligned % env_get_pagesize(env); st=device->alloc(device, aligned, &addr); if (st) return (st); /* if aligned!=size, and the remaining chunk is large enough: * move it to the freelist */ { ham_size_t diff=aligned-alloc_size; if (diff > SMALLEST_CHUNK_SIZE) { (void)freel_mark_free(env, db, addr+alloc_size, diff, HAM_FALSE); blob_set_alloc_size(&hdr, aligned-diff); } else { blob_set_alloc_size(&hdr, aligned); } } freshly_created = HAM_TRUE; } ham_assert(HAM_SUCCESS == freel_check_area_is_allocated(env, db, addr, alloc_size), (0)); } else { ham_assert(!st, (0)); blob_set_alloc_size(&hdr, alloc_size); } blob_set_size(&hdr, record->size); blob_set_self(&hdr, addr); /* * PARTIAL WRITE * * are there gaps at the beginning? If yes, then we'll fill with zeros */ if ((flags&HAM_PARTIAL) && (record->partial_offset)) { ham_u8_t *ptr; ham_size_t gapsize=record->partial_offset; ptr=allocator_calloc(env_get_allocator(env), gapsize > env_get_pagesize(env) ? env_get_pagesize(env) : gapsize); if (!ptr) return (HAM_OUT_OF_MEMORY); /* * first: write the header */ chunk_data[0]=(ham_u8_t *)&hdr; chunk_size[0]=sizeof(hdr); st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 1); if (st) return (st); addr+=sizeof(hdr); /* now fill the gap; if the gap is bigger than a pagesize we'll * split the gap into smaller chunks */ while (gapsize>=env_get_pagesize(env)) { chunk_data[0]=ptr; chunk_size[0]=env_get_pagesize(env); st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 1); if (st) break; gapsize-=env_get_pagesize(env); addr+=env_get_pagesize(env); } /* fill the remaining gap */ if (gapsize) { chunk_data[0]=ptr; chunk_size[0]=gapsize; st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 1); if (st) return (st); addr+=gapsize; } allocator_free(env_get_allocator(env), ptr); /* now write the "real" data */ chunk_data[0]=(ham_u8_t *)record->data; chunk_size[0]=record->partial_size; st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 1); if (st) return (st); addr+=record->partial_size; } else { /* * not writing partially: write header and data, then we're done */ chunk_data[0]=(ham_u8_t *)&hdr; chunk_size[0]=sizeof(hdr); chunk_data[1]=(ham_u8_t *)record->data; chunk_size[1]=(flags&HAM_PARTIAL) ? record->partial_size : record->size; st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 2); if (st) return (st); addr+=sizeof(hdr)+ ((flags&HAM_PARTIAL) ? record->partial_size : record->size); } /* * store the blobid; it will be returned to the caller */ *blobid=blob_get_self(&hdr); /* * PARTIAL WRITES: * * if we have gaps at the end of the blob: just append more chunks to * fill these gaps. Since they can be pretty large we split them into * smaller chunks if necessary. */ if (flags&HAM_PARTIAL) { if (record->partial_offset+record->partial_size < record->size) { ham_u8_t *ptr; ham_size_t gapsize=record->size - (record->partial_offset+record->partial_size); /* now fill the gap; if the gap is bigger than a pagesize we'll * split the gap into smaller chunks * * we split this loop in two - the outer loop will allocate the * memory buffer, thus saving some allocations */ while (gapsize>env_get_pagesize(env)) { ham_u8_t *ptr=allocator_calloc(env_get_allocator(env), env_get_pagesize(env)); if (!ptr) return (HAM_OUT_OF_MEMORY); while (gapsize>env_get_pagesize(env)) { chunk_data[0]=ptr; chunk_size[0]=env_get_pagesize(env); st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 1); if (st) break; gapsize-=env_get_pagesize(env); addr+=env_get_pagesize(env); } allocator_free(env_get_allocator(env), ptr); if (st) return (st); } /* now write the remainder, which is less than a pagesize */ ham_assert(gapsize<env_get_pagesize(env), ("")); chunk_size[0]=gapsize; ptr=chunk_data[0]=allocator_calloc(env_get_allocator(env), gapsize); if (!ptr) return (HAM_OUT_OF_MEMORY); st=__write_chunks(env, page, addr, HAM_TRUE, freshly_created, chunk_data, chunk_size, 1); allocator_free(env_get_allocator(env), ptr); if (st) return (st); } } return (0); }
ham_status_t blob_duplicate_insert(ham_db_t *db, ham_offset_t table_id, ham_record_t *record, ham_size_t position, ham_u32_t flags, dupe_entry_t *entries, ham_size_t num_entries, ham_offset_t *rid, ham_size_t *new_position) { ham_status_t st=0; dupe_table_t *table=0; ham_bool_t alloc_table=0; ham_bool_t resize=0; ham_page_t *page=0; ham_env_t *env=db_get_env(db); /* * create a new duplicate table if none existed, and insert * the first entry */ if (!table_id) { ham_assert(num_entries==2, ("")); /* allocates space for 8 (!) entries */ table=allocator_calloc(env_get_allocator(env), sizeof(dupe_table_t)+7*sizeof(dupe_entry_t)); if (!table) return HAM_OUT_OF_MEMORY; dupe_table_set_capacity(table, 8); dupe_table_set_count(table, 1); memcpy(dupe_table_get_entry(table, 0), &entries[0], sizeof(entries[0])); /* skip the first entry */ entries++; num_entries--; alloc_table=1; } else { /* * otherwise load the existing table */ st=__get_duplicate_table(&table, &page, env, table_id); ham_assert(st ? table == NULL : 1, (0)); ham_assert(st ? page == NULL : 1, (0)); if (!table) return st ? st : HAM_INTERNAL_ERROR; if (!page && !(env_get_rt_flags(env)&HAM_IN_MEMORY_DB)) alloc_table=1; } if (page) if ((st=ham_log_add_page_before(page))) return (st); ham_assert(num_entries==1, ("")); /* * resize the table, if necessary */ if (!(flags & HAM_OVERWRITE) && dupe_table_get_count(table)+1>=dupe_table_get_capacity(table)) { dupe_table_t *old=table; ham_size_t new_cap=dupe_table_get_capacity(table); if (new_cap < 3*8) new_cap += 8; else new_cap += new_cap/3; table=allocator_calloc(env_get_allocator(env), sizeof(dupe_table_t)+ (new_cap-1)*sizeof(dupe_entry_t)); if (!table) return (HAM_OUT_OF_MEMORY); dupe_table_set_capacity(table, new_cap); dupe_table_set_count(table, dupe_table_get_count(old)); memcpy(dupe_table_get_entry(table, 0), dupe_table_get_entry(old, 0), dupe_table_get_count(old)*sizeof(dupe_entry_t)); if (alloc_table) allocator_free(env_get_allocator(env), old); alloc_table=1; resize=1; } /* * insert sorted, unsorted or overwrite the entry at the requested position */ if (flags&HAM_OVERWRITE) { dupe_entry_t *e=dupe_table_get_entry(table, position); if (!(dupe_entry_get_flags(e)&(KEY_BLOB_SIZE_SMALL |KEY_BLOB_SIZE_TINY |KEY_BLOB_SIZE_EMPTY))) { (void)blob_free(env, db, dupe_entry_get_rid(e), 0); } memcpy(dupe_table_get_entry(table, position), &entries[0], sizeof(entries[0])); } else { if (db_get_rt_flags(db)&HAM_SORT_DUPLICATES) { if (page) page_add_ref(page); position=__get_sorted_position(db, table, record, flags); if (page) page_release_ref(page); if (position<0) return ((ham_status_t)position); } else if (flags&HAM_DUPLICATE_INSERT_BEFORE) { /* do nothing, insert at the current position */ } else if (flags&HAM_DUPLICATE_INSERT_AFTER) { position++; if (position > dupe_table_get_count(table)) position=dupe_table_get_count(table); } else if (flags&HAM_DUPLICATE_INSERT_FIRST) { position=0; } else if (flags&HAM_DUPLICATE_INSERT_LAST) { position=dupe_table_get_count(table); } else { position=dupe_table_get_count(table); } if (position != dupe_table_get_count(table)) { memmove(dupe_table_get_entry(table, position+1), dupe_table_get_entry(table, position), sizeof(entries[0])*(dupe_table_get_count(table)-position)); } memcpy(dupe_table_get_entry(table, position), &entries[0], sizeof(entries[0])); dupe_table_set_count(table, dupe_table_get_count(table)+1); } /* * write the table back to disk and return the blobid of the table */ if ((table_id && !page) || resize) { ham_record_t rec={0}; rec.data=(ham_u8_t *)table; rec.size=sizeof(dupe_table_t) +(dupe_table_get_capacity(table)-1)*sizeof(dupe_entry_t); st=blob_overwrite(env, db, table_id, &rec, 0, rid); } else if (!table_id) { ham_record_t rec={0}; rec.data=(ham_u8_t *)table; rec.size=sizeof(dupe_table_t) +(dupe_table_get_capacity(table)-1)*sizeof(dupe_entry_t); st=blob_allocate(env, db, &rec, 0, rid); } else if (table_id && page) { page_set_dirty(page, env); } else { ham_assert(!"shouldn't be here", (0)); } if (alloc_table) allocator_free(env_get_allocator(env), table); if (new_position) *new_position=position; return (st); }
static ham_status_t __insert_cursor(ham_btree_t *be, ham_key_t *key, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st; ham_page_t *root; ham_db_t *db=be_get_db(be); ham_env_t *env = db_get_env(db); insert_scratchpad_t scratchpad; ham_assert(hints->force_append == HAM_FALSE, (0)); ham_assert(hints->force_prepend == HAM_FALSE, (0)); /* * initialize the scratchpad */ memset(&scratchpad, 0, sizeof(scratchpad)); scratchpad.be=be; scratchpad.record=record; scratchpad.cursor=cursor; /* * get the root-page... */ ham_assert(btree_get_rootpage(be)!=0, ("btree has no root page")); st=db_fetch_page(&root, db, btree_get_rootpage(be), 0); ham_assert(st ? root == NULL : 1, (0)); if (st) return st; /* * ... and start the recursion */ st=__insert_recursive(root, key, 0, &scratchpad, hints); /* * if the root page was split, we have to create a new * root page. */ if (st==SPLIT) { ham_page_t *newroot; btree_node_t *node; /* * the root-page will be changed... */ st=ham_log_add_page_before(root); if (st) return (st); /* * allocate a new root page */ st=db_alloc_page(&newroot, db, PAGE_TYPE_B_ROOT, 0); ham_assert(st ? newroot == NULL : 1, (0)); if (st) return (st); ham_assert(page_get_owner(newroot), ("")); /* clear the node header */ memset(page_get_payload(newroot), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, root, HAM_TRUE); /* * insert the pivot element and the ptr_left */ node=ham_page_get_btree_node(newroot); btree_node_set_ptr_left(node, btree_get_rootpage(be)); st=__insert_nosplit(newroot, &scratchpad.key, scratchpad.rid, scratchpad.record, scratchpad.cursor, hints); ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); scratchpad.cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ if (st) { ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); } /* * set the new root page * * !! * do NOT delete the old root page - it's still in use! * * also don't forget to flush the backend - otherwise the header * page of the database will not contain the updated information. * The backend is flushed when the database is closed, but if * recovery is enabled then the flush here is critical. */ btree_set_rootpage(be, page_get_self(newroot)); be_set_dirty(be, HAM_TRUE); be->_fun_flush(be); /* * As we re-purpose a page, we will reset its pagecounter * as well to signal its first use as the new type assigned * here. */ if (env_get_cache(env) && (page_get_type(root)!=PAGE_TYPE_B_INDEX)) cache_update_page_access_counter(root, env_get_cache(env), 0); page_set_type(root, PAGE_TYPE_B_INDEX); page_set_dirty(root, env); page_set_dirty(newroot, env); /* the root page was modified (btree_set_rootpage) - make sure that * it's logged */ if (env_get_rt_flags(env)&HAM_ENABLE_RECOVERY) { st=txn_add_page(env_get_txn(env), env_get_header_page(env), HAM_TRUE); if (st) return (st); } } /* * release the scratchpad-memory and return to caller */ ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); }