Beispiel #1
0
DbStatus btree1NextKey (DbCursor *dbCursor, DbMap *map) {
Btree1Cursor *cursor = (Btree1Cursor *)((char *)dbCursor + dbCursor->xtra);
uint8_t *key;

	switch (dbCursor->state) {
	  case CursorNone:
		btree1LeftKey(dbCursor, map);
		break;

	  case CursorRightEof:
		return DB_CURSOR_eof;

	  default:
		break;
	}

	while (true) {
	  uint32_t max = cursor->page->cnt;

	  if (!cursor->page->right.bits)
		max--;

	  while (++cursor->slotIdx <= max) {
		Btree1Slot *slot = slotptr(cursor->page, cursor->slotIdx);

		if (slot->dead)
		  continue;

		key = keyaddr(cursor->page, slot->off);
		dbCursor->key = key + keypre(key);
		dbCursor->keyLen = keylen(key);
		dbCursor->state = CursorPosAt;
		return DB_OK;
	  }

	  if (cursor->page->right.bits)
		cursor->page = getObj(map, cursor->page->right);
	  else
		break;

	  cursor->slotIdx = 0;
	}

	dbCursor->state = CursorRightEof;
	return DB_CURSOR_eof;
}
Beispiel #2
0
DbStatus btree1PrevKey (DbCursor *dbCursor, DbMap *map) {
Btree1Cursor *cursor = (Btree1Cursor *)((char *)dbCursor + dbCursor->xtra);
uint8_t *key;

	switch (dbCursor->state) {
	  case CursorNone:
		btree1RightKey(dbCursor, map);
		break;

	  case CursorLeftEof:
		return DB_CURSOR_eof;

	  default:
		break;
	}

	while (true) {
	  if (cursor->slotIdx > 1) {
		Btree1Slot *slot = slotptr(cursor->page, --cursor->slotIdx);

		if (slot->dead)
		  continue;

		key = keyaddr(cursor->page, slot->off);
		dbCursor->key = key + keypre(key);
		dbCursor->keyLen = keylen(key);
		dbCursor->state = CursorPosAt;
		return DB_OK;
	  }

	  if (cursor->page->left.bits)
		cursor->page = getObj(map, cursor->page->left);
	  else
		break;

	  cursor->slotIdx = cursor->page->cnt + 1;
	}

	dbCursor->state = CursorLeftEof;
	return DB_CURSOR_eof;
}
Beispiel #3
0
DbStatus btree1LoadPage(DbMap *map, Btree1Set *set, void *key, uint32_t keyLen, uint8_t lvl, Btree1Lock lock, bool stopper) {
Btree1Index *btree1 = btree1index(map);
uint8_t drill = 0xff, *ptr;
Btree1Page *prevPage = NULL;
Btree1Lock mode, prevMode;
DbAddr prevPageNo;

  set->pageNo.bits = btree1->root.bits;
  prevPageNo.bits = 0;

  //  start at our idea of the root level of the btree1 and drill down

  do {
	// determine lock mode of drill level

	mode = (drill == lvl) ? lock : Btree1_lockRead; 
	set->page = getObj(map, set->pageNo);

	//	release parent or left sibling page

	if( prevPageNo.bits ) {
	  btree1UnlockPage(prevPage, prevMode);
	  prevPageNo.bits = 0;
	}

 	// obtain mode lock

	btree1LockPage(set->page, mode);

	if( set->page->free )
		return DB_BTREE_error;

	// re-read and re-lock root after determining actual level of root

	if( set->page->lvl != drill) {
		assert(drill == 0xff);
		drill = set->page->lvl;

		if( lock != Btree1_lockRead && drill == lvl ) {
		  btree1UnlockPage(set->page, mode);
		  continue;
		}
	}

	assert(lvl <= set->page->lvl);

	prevPageNo.bits = set->pageNo.bits;
	prevPage = set->page;
	prevMode = mode;

	//  find key on page at this level
	//  and descend to requested level

	if( !set->page->kill )
	 if( (set->slotIdx = btree1FindSlot (set->page, key, keyLen, stopper)) ) {
	  if( drill == lvl )
		return DB_OK;

	  // find next non-dead slot -- the fence key if nothing else

	  while( slotptr(set->page, set->slotIdx)->dead )
		if( set->slotIdx++ < set->page->cnt )
		  continue;
		else
		  return DB_BTREE_error;

	  // get next page down

	  ptr = keyptr(set->page, set->slotIdx);
	  set->pageNo.bits = btree1GetPageNo(ptr + keypre(ptr), keylen(ptr));

	  assert(drill > 0);
	  drill--;
	  continue;
	 }

	//  or slide right into next page

	set->pageNo.bits = set->page->right.bits;
  } while( set->pageNo.bits );

  // return error on end of right chain

  return DB_BTREE_error;
}
Beispiel #4
0
Btree1Slot *btree1Slot(Btree1Page *page, uint32_t idx)
{
	return slotptr(page, idx);
}
Beispiel #5
0
DbStatus btree1CleanPage(Handle *index, Btree1Set *set, uint32_t totKeyLen) {
Btree1Index *btree1 = btree1index(index->map);
Btree1Slot librarian, *source, *dest;
uint32_t size = btree1->pageSize;
Btree1Page *page = set->page;
uint32_t max = page->cnt;
uint32_t len, cnt, idx;
uint32_t newSlot = max;
Btree1PageType type;
Btree1Page *frame;
uint8_t *key;
DbAddr addr;

	librarian.bits = 0;
	librarian.type = Btree1_librarian;
	librarian.dead = 1;

	if( !page->lvl ) {
		size <<= btree1->leafXtra;
		type = Btree1_leafPage;
	} else {
		type = Btree1_interior;
	}

	if( page->min >= (max+1) * sizeof(Btree1Slot) + sizeof(*page) + totKeyLen )
		return DB_OK;

	//	skip cleanup and proceed directly to split
	//	if there's not enough garbage
	//	to bother with.

	if( page->garbage < size / 5 )
		return DB_BTREE_needssplit;

	if( (addr.bits = allocObj(index->map, listFree(index, type), NULL, type, size, false)) )
		frame = getObj(index->map, addr);
	else
		return DB_ERROR_outofmemory;

	memcpy (frame, page, size);

	// skip page info and set rest of page to zero

	memset (page+1, 0, size - sizeof(*page));
	page->garbage = 0;
	page->act = 0;

	cnt = 0;
	idx = 0;

	source = slotptr(frame, cnt);
	dest = slotptr(page, idx);

	// clean up page first by
	// removing deleted keys

	while( source++, cnt++ < max ) {
		if( cnt == set->slotIdx )
			newSlot = idx + 2;

		if( source->dead )
			continue;

		// copy the active key across

		key = keyaddr(frame, source->off);
		len = keylen(key) + keypre(key);
		size -= len;

		memcpy ((uint8_t *)page + size, key, len);

		// make a librarian slot

		if (cnt < max) {
			(++dest)->bits = librarian.bits;
			++idx;
		}

		// set up the slot

		(++dest)->bits = source->bits;
		dest->off = size;
		idx++;

		page->act++;
	}

	page->min = size;
	page->cnt = idx;

	//	update insert slot index
	//	for newly cleaned-up page

	set->slotIdx = newSlot;

	//  return temporary frame

	addSlotToFrame(index->map, listFree(index,addr.type), NULL, addr.bits);

	//	see if page has enough space now, or does it still need splitting?

	if( page->min >= (idx+1) * sizeof(Btree1Slot) + sizeof(*page) + totKeyLen )
		return DB_OK;

	return DB_BTREE_needssplit;
}
Beispiel #6
0
DbStatus btree1SplitRoot(Handle *index, Btree1Set *root, DbAddr right, uint8_t *leftKey) {
Btree1Index *btree1 = btree1index(index->map);
uint32_t keyLen, nxt = btree1->pageSize;
Btree1Page *leftPage, *rightPage;
Btree1Slot *slot;
uint8_t *ptr;
uint32_t off;
DbAddr left;

	//  Obtain an empty page to use, and copy the current
	//  root contents into it, e.g. lower keys

	if( (left.bits = btree1NewPage(index, root->page->lvl)) )
		leftPage = getObj(index->map, left);
	else
		return DB_ERROR_outofmemory;

	//	copy in new smaller keys into left page
	//	(clear the latches)

	memcpy (leftPage->latch + 1, root->page->latch + 1, btree1->pageSize - sizeof(*leftPage->latch));
	rightPage = getObj(index->map, right);
	rightPage->left.bits = left.bits;

	// preserve the page info at the bottom
	// of higher keys and set rest to zero

	memset(root->page+1, 0, btree1->pageSize - sizeof(*root->page));

	// insert stopper key on root page
	// pointing to right half page 
	// and increase the root height

	nxt -= 1 + sizeof(uint64_t);
	slot = slotptr(root->page, 2);
	slot->type = Btree1_stopper;
	slot->off = nxt;

	ptr = keyaddr(root->page, nxt);
	btree1PutPageNo(ptr + 1, 0, right.bits);
	ptr[0] = sizeof(uint64_t);

	// next insert lower keys (left) fence key on newroot page as
	// first key and reserve space for the key.

	keyLen = keylen(leftKey);
	off = keypre(leftKey);

	nxt -= keyLen + off;
	slot = slotptr(root->page, 1);
	slot->type = Btree1_indexed;
	slot->off = nxt;

	//	construct lower (left) page key

	ptr = keyaddr(root->page, nxt);
	memcpy (ptr + off, leftKey + keypre(leftKey), keyLen - sizeof(uint64_t));
	btree1PutPageNo(ptr + off, keyLen - sizeof(uint64_t), left.bits);

	if (off == 1)
		ptr[0] = keyLen;
	else
		ptr[0] = keyLen / 256 | 0x80, ptr[1] = keyLen;
	
	root->page->right.bits = 0;
	root->page->min = nxt;
	root->page->cnt = 2;
	root->page->act = 2;
	root->page->lvl++;

	// release root page

	btree1UnlockPage(root->page, Btree1_lockWrite);
	return DB_OK;
}
Beispiel #7
0
DbStatus btree1SplitPage (Handle *index, Btree1Set *set) {
uint8_t leftKey[Btree1_maxkey], rightKey[Btree1_maxkey];
Btree1Index *btree1 = btree1index(index->map);
uint32_t cnt = 0, idx = 0, max, nxt, off;
Btree1Slot librarian, *source, *dest;
uint32_t size = btree1->pageSize;
Btree1Page *frame, *rightPage;
uint8_t lvl = set->page->lvl;
uint32_t totLen, keyLen;
uint8_t *key = NULL;
DbAddr right, addr;
bool stopper;
DbStatus stat;

#ifdef DEBUG
	atomicAdd32(&Splits, 1);
#endif

	librarian.bits = 0;
	librarian.type = Btree1_librarian;
	librarian.dead = 1;

	if( !set->page->lvl )
		size <<= btree1->leafXtra;

	//	get new page and write higher keys to it.

	if( (right.bits = btree1NewPage(index, lvl)) )
		rightPage = getObj(index->map, right);
	else
		return DB_ERROR_outofmemory;

	max = set->page->cnt;
	cnt = max / 2;
	nxt = size;
	idx = 0;

	source = slotptr(set->page, cnt);
	dest = slotptr(rightPage, 0);

	while( source++, cnt++ < max ) {
		if( source->dead )
			continue;

		key = keyaddr(set->page, source->off);
		totLen = keylen(key) + keypre(key);
		nxt -= totLen;

		memcpy (keyaddr(rightPage, nxt), key, totLen);
		rightPage->act++;

		//	add librarian slot

		if (cnt < max) {
			(++dest)->bits = librarian.bits;
			dest->off = nxt;
			idx++;
		}

		//  add actual slot

		(++dest)->bits = source->bits;
		dest->off = nxt;
		idx++;
	}

	//	remember right fence key for larger page
	//	extend right leaf fence key with
	//	the right page number on leaf page.

	stopper = dest->type == Btree1_stopper;
	keyLen = keylen(key);

	if( set->page->lvl)
		keyLen -= sizeof(uint64_t);		// strip off pageNo

	if( keyLen + sizeof(uint64_t) < 128 )
		off = 1;
	else
		off = 2;

	//	copy key and add pageNo

	memcpy (rightKey + off, key + keypre(key), keyLen);
	btree1PutPageNo(rightKey + off, keyLen, right.bits);
	keyLen += sizeof(uint64_t);

	if (off == 1)
		rightKey[0] = keyLen;
	else
		rightKey[0] = keyLen / 256 | 0x80, rightKey[1] = keyLen;

	rightPage->min = nxt;
	rightPage->cnt = idx;
	rightPage->lvl = lvl;

	// link right node

	if( set->pageNo.type != Btree1_rootPage ) {
		rightPage->right.bits = set->page->right.bits;
		rightPage->left.bits = set->pageNo.bits;

		if( !lvl && rightPage->right.bits ) {
			Btree1Page *farRight = getObj(index->map, rightPage->right);
			btree1LockPage (farRight, Btree1_lockLink);
			farRight->left.bits = right.bits;
			btree1UnlockPage (farRight, Btree1_lockLink);
		}
	}

	//	copy lower keys from temporary frame back into old page

	if( (addr.bits = btree1NewPage(index, lvl)) )
		frame = getObj(index->map, addr);
	else
		return DB_ERROR_outofmemory;

	memcpy (frame, set->page, size);
	memset (set->page+1, 0, size - sizeof(*set->page));

	set->page->garbage = 0;
	set->page->act = 0;
	nxt = size;
	max /= 2;
	cnt = 0;
	idx = 0;

	//  ignore librarian max key

	if( slotptr(frame, max)->type == Btree1_librarian )
		max--;

	source = slotptr(frame, 0);
	dest = slotptr(set->page, 0);

#ifdef DEBUG
	key = keyaddr(frame, source[2].off);
	assert(keylen(key) > 0);
#endif
	//  assemble page of smaller keys from temporary frame copy

	while( source++, cnt++ < max ) {
		if( source->dead )
			continue;

		key = keyaddr(frame, source->off);
		totLen = keylen(key) + keypre(key);
		nxt -= totLen;

		memcpy (keyaddr(set->page, nxt), key, totLen);

		//	add librarian slot, except before fence key

		if (cnt < max) {
			(++dest)->bits = librarian.bits;
			dest->off = nxt;
			idx++;
		}

		//	add actual slot

		(++dest)->bits = source->bits;
		dest->off = nxt;
		idx++;

		set->page->act++;
	}

	set->page->right.bits = right.bits;
	set->page->min = nxt;
	set->page->cnt = idx;

	//	remember left fence key for smaller page
	//	extend left leaf fence key with
	//	the left page number.

	keyLen = keylen(key);

	if( set->page->lvl)
		keyLen -= sizeof(uint64_t);		// strip off pageNo

	if( keyLen + sizeof(uint64_t) < 128 )
		off = 1;
	else
		off = 2;

	//	copy key and add pageNo

	memcpy (leftKey + off, key + keypre(key), keyLen);
	btree1PutPageNo(leftKey + off, keyLen, set->pageNo.bits);
	keyLen += sizeof(uint64_t);

	if (off == 1)
		leftKey[0] = keyLen;
	else
		leftKey[0] = keyLen / 256 | 0x80, leftKey[1] = keyLen;

	//  return temporary frame

	addSlotToFrame(index->map, listFree(index, addr.type), NULL, addr.bits);

	// if current page is the root page, split it

	if( set->pageNo.type == Btree1_rootPage )
		return btree1SplitRoot (index, set, right, leftKey);

	// insert new fences in their parent pages

	btree1LockPage (rightPage, Btree1_lockParent);
	btree1LockPage (set->page, Btree1_lockParent);
	btree1UnlockPage (set->page, Btree1_lockWrite);

	// insert new fence for reformulated left block of smaller keys

	if( (stat = btree1InsertKey(index, leftKey + keypre(leftKey), keylen(leftKey), lvl+1, Btree1_indexed) ))
		return stat;

	// switch fence for right block of larger keys to new right page

	if( (stat = btree1FixKey(index, rightKey, lvl+1, stopper) ))
		return stat;

	btree1UnlockPage (set->page, Btree1_lockParent);
	btree1UnlockPage (rightPage, Btree1_lockParent);
	return DB_OK;
}
Beispiel #8
0
BtMgr *bt_mgr (char *name, uint mode, uint bits, uint poolmax, uint segsize, uint hashsize)
{
uint lvl, attr, cacheblk, last;
BtPage alloc;
int lockmode;
off64_t size;
uint amt[1];
BtMgr* mgr;
BtKey key;

#ifndef unix
SYSTEM_INFO sysinfo[1];
#endif

	// determine sanity of page size and buffer pool

	if( bits > BT_maxbits )
		bits = BT_maxbits;
	else if( bits < BT_minbits )
		bits = BT_minbits;

	if( !poolmax )
		return NULL;	// must have buffer pool

#ifdef unix
	mgr = calloc (1, sizeof(BtMgr));

	switch (mode & 0x7fff)
	{
	case BT_rw:
		mgr->idx = open ((char*)name, O_RDWR | O_CREAT, 0666);
		lockmode = 1;
		break;

	case BT_ro:
	default:
		mgr->idx = open ((char*)name, O_RDONLY);
		lockmode = 0;
		break;
	}
	if( mgr->idx == -1 )
		return free(mgr), NULL;
	
	cacheblk = 4096;	// minimum mmap segment size for unix

#else
	mgr = GlobalAlloc (GMEM_FIXED|GMEM_ZEROINIT, sizeof(BtMgr));
	attr = FILE_ATTRIBUTE_NORMAL;
	switch (mode & 0x7fff)
	{
	case BT_rw:
		mgr->idx = CreateFile(name, GENERIC_READ| GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, attr, NULL);
		lockmode = 1;
		break;

	case BT_ro:
	default:
		mgr->idx = CreateFile(name, GENERIC_READ, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, attr, NULL);
		lockmode = 0;
		break;
	}
	if( mgr->idx == INVALID_HANDLE_VALUE )
		return GlobalFree(mgr), NULL;

	// normalize cacheblk to multiple of sysinfo->dwAllocationGranularity
	GetSystemInfo(sysinfo);
	cacheblk = sysinfo->dwAllocationGranularity;
#endif

#ifdef unix
	alloc = malloc (BT_maxpage);
	*amt = 0;

	// read minimum page size to get root info

	if( size = lseek (mgr->idx, 0L, 2) ) {
		if( pread(mgr->idx, alloc, BT_minpage, 0) == BT_minpage )
			bits = alloc->bits;
		else
			return free(mgr), free(alloc), NULL;
	} else if( mode == BT_ro )
		return bt_mgrclose (mgr), NULL;
#else
	alloc = VirtualAlloc(NULL, BT_maxpage, MEM_COMMIT, PAGE_READWRITE);
	size = GetFileSize(mgr->idx, amt);

	if( size || *amt ) {
		if( !ReadFile(mgr->idx, (char *)alloc, BT_minpage, amt, NULL) )
			return bt_mgrclose (mgr), NULL;
		bits = alloc->bits;
	} else if( mode == BT_ro )
		return bt_mgrclose (mgr), NULL;
#endif

	mgr->page_size = 1 << bits;
	mgr->page_bits = bits;

	mgr->poolmax = poolmax;
	mgr->mode = mode;

	if( cacheblk < mgr->page_size )
		cacheblk = mgr->page_size;

	//  mask for partial memmaps

	mgr->poolmask = (cacheblk >> bits) - 1;

	//	see if requested size of pages per memmap is greater

	if( (1 << segsize) > mgr->poolmask )
		mgr->poolmask = (1 << segsize) - 1;

	mgr->seg_bits = 0;

	while( (1 << mgr->seg_bits) <= mgr->poolmask )
		mgr->seg_bits++;

	mgr->hashsize = hashsize;

#ifdef unix
	mgr->pool = calloc (poolmax, sizeof(BtPool));
	mgr->hash = calloc (hashsize, sizeof(ushort));
	mgr->latch = calloc (hashsize, sizeof(BtLatch));
	mgr->pooladvise = calloc (poolmax, (mgr->poolmask + 8) / 8);
#else
	mgr->pool = GlobalAlloc (GMEM_FIXED|GMEM_ZEROINIT, poolmax * sizeof(BtPool));
	mgr->hash = GlobalAlloc (GMEM_FIXED|GMEM_ZEROINIT, hashsize * sizeof(ushort));
	mgr->latch = GlobalAlloc (GMEM_FIXED|GMEM_ZEROINIT, hashsize * sizeof(BtLatch));
#endif

	if( size || *amt )
		goto mgrxit;

	// initializes an empty b-tree with root page and page of leaves

	memset (alloc, 0, 1 << bits);
	bt_putid(alloc->right, MIN_lvl+1);
	alloc->bits = mgr->page_bits;

#ifdef unix
	if( write (mgr->idx, alloc, mgr->page_size) < mgr->page_size )
		return bt_mgrclose (mgr), NULL;
#else
	if( !WriteFile (mgr->idx, (char *)alloc, mgr->page_size, amt, NULL) )
		return bt_mgrclose (mgr), NULL;

	if( *amt < mgr->page_size )
		return bt_mgrclose (mgr), NULL;
#endif

	memset (alloc, 0, 1 << bits);
	alloc->bits = mgr->page_bits;

	for( lvl=MIN_lvl; lvl--; ) {
		slotptr(alloc, 1)->off = mgr->page_size - 3;
		bt_putid(slotptr(alloc, 1)->id, lvl ? MIN_lvl - lvl + 1 : 0);		// next(lower) page number
		key = keyptr(alloc, 1);
		key->len = 2;			// create stopper key
		key->key[0] = 0xff;
		key->key[1] = 0xff;
		alloc->min = mgr->page_size - 3;
		alloc->lvl = lvl;
		alloc->cnt = 1;
		alloc->act = 1;
#ifdef unix
		if( write (mgr->idx, alloc, mgr->page_size) < mgr->page_size )
			return bt_mgrclose (mgr), NULL;
#else
		if( !WriteFile (mgr->idx, (char *)alloc, mgr->page_size, amt, NULL) )
			return bt_mgrclose (mgr), NULL;

		if( *amt < mgr->page_size )
			return bt_mgrclose (mgr), NULL;
#endif
	}

	// create empty page area by writing last page of first
	// segment area (other pages are zeroed by O/S)

	if( mgr->poolmask ) {
		memset(alloc, 0, mgr->page_size);
		last = mgr->poolmask;

		while( last < MIN_lvl + 1 )
			last += mgr->poolmask + 1;

#ifdef unix
		pwrite(mgr->idx, alloc, mgr->page_size, last << mgr->page_bits);
#else
		SetFilePointer (mgr->idx, last << mgr->page_bits, NULL, FILE_BEGIN);
		if( !WriteFile (mgr->idx, (char *)alloc, mgr->page_size, amt, NULL) )
			return bt_mgrclose (mgr), NULL;
		if( *amt < mgr->page_size )
			return bt_mgrclose (mgr), NULL;
#endif
	}

mgrxit:
#ifdef unix
	free (alloc);
#else
	VirtualFree (alloc, 0, MEM_RELEASE);
#endif
	return mgr;
}