gxDatabaseError BtreeCache::LRUFlush(unsigned num_to_flush) // Function used to flush a specified number of least recently // used (LRU) dirty buckets. Returns zero if successful or a // non-zero value to indicate a failure. { if((f == 0) || (buckets == 0)) return gxDBASE_NULL_PTR; // Flush, starting at the tail so that cache will be flushed // in a first in, first out sequence (FIFO) BtreeBucket *bucket = tail; unsigned num_flushed = 0; // PC-lint 04/20/2004: Possible use ot null pointer if(!bucket) return gxDBASE_NULL_PTR; while(1) { if(num_flushed == num_to_flush) break; if(f->ReadyForWriting()) { if(bucket->Flush(f) != gxDBASE_NO_ERROR) { return f->GetDatabaseError(); // Error writing to the file } } else { // The file is not ready for writing return f->SetDatabaseError(gxDBASE_FILE_NOT_READY); } bucket = bucket->prev; if(bucket == tail) break; // Reached the end of the list num_flushed++; } return gxDBASE_NO_ERROR; }
gxDatabaseError BtreeCache::Flush() // Function used to flush all the dirty buckets to the database file. // Returns zero if successful or a non-zero value to indicate // a failure. { if((f == 0) || (buckets == 0)) return gxDBASE_NULL_PTR; // Flush, starting at the front so that cache will be flushed // in a first in, last out sequence BtreeBucket *bucket = head; // PC-lint 04/20/2004: Possible use of null pointer if(!bucket) return gxDBASE_NULL_PTR; do { if(f->ReadyForWriting()) { if(bucket->Flush(f) != gxDBASE_NO_ERROR) { return f->GetDatabaseError(); // Error writing to the file } } else { // The file is not ready for writing return f->SetDatabaseError(gxDBASE_FILE_NOT_READY); } bucket = bucket->next; } while(bucket != head); return gxDBASE_NO_ERROR; }
/* Since the last noteLocation(), our key may have moved around, and that old cached information may thus be stale and wrong (although often it is right). We check that here; if we have moved, we have to search back for where we were at. i.e., after operations on the index, the BtreeCursor's cached location info may be invalid. This function ensures validity, so you should call it before using the cursor if other writers have used the database since the last noteLocation call. */ void BtreeCursor::checkLocation() { if ( eof() ) return; if ( keyOfs >= 0 ) { BtreeBucket *b = bucket.btree(); assert( !keyAtKeyOfs.isEmpty() ); // Note keyAt() returns an empty BSONObj if keyOfs is now out of range, // which is possible as keys may have been deleted. if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && b->k(keyOfs).recordLoc == locAtKeyOfs ) { if ( !b->k(keyOfs).isUsed() ) { /* we were deleted but still exist as an unused marker key. advance. */ skipUnusedKeys(); } return; } } /* normally we don't get to here. when we do, old position is no longer valid and we must refind where we left off (which is expensive) */ bool found; /* TODO: Switch to keep indexdetails and do idx.head! */ bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, order, keyOfs, found, locAtKeyOfs, direction); RARELY log() << " key seems to have moved in the index, refinding. found:" << found << endl; if ( found ) skipUnusedKeys(); }
void BtreeBuilder<V>::buildNextLevel(DiskLoc loc) { int levels = 1; while( 1 ) { if( loc.btree<V>()->tempNext().isNull() ) { // only 1 bucket at this level. we are done. getDur().writingDiskLoc(idx.head) = loc; break; } levels++; DiskLoc upLoc = BtreeBucket<V>::addBucket(idx); DiskLoc upStart = upLoc; BtreeBucket<V> *up = upLoc.btreemod<V>(); DiskLoc xloc = loc; while( !xloc.isNull() ) { if ( getDur().commitIfNeeded() ) { b = cur.btreemod<V>(); up = upLoc.btreemod<V>(); } BtreeBucket<V> *x = xloc.btreemod<V>(); Key k; DiskLoc r; x->popBack(r,k); bool keepX = ( x->n != 0 ); DiskLoc keepLoc = keepX ? xloc : x->nextChild; if ( ! up->_pushBack(r, k, ordering, keepLoc) ) { // current bucket full DiskLoc n = BtreeBucket<V>::addBucket(idx); up->setTempNext(n); upLoc = n; up = upLoc.btreemod<V>(); up->pushBack(r, k, ordering, keepLoc); } DiskLoc nextLoc = x->tempNext(); // get next in chain at current level if ( keepX ) { x->parent = upLoc; } else { if ( !x->nextChild.isNull() ) { DiskLoc ll = x->nextChild; ll.btreemod<V>()->parent = upLoc; //(x->nextChild.btreemod<V>())->parent = upLoc; } x->deallocBucket( xloc, idx ); } xloc = nextLoc; } loc = upStart; mayCommitProgressDurably(); } if( levels > 1 ) log(2) << "btree levels: " << levels << endl; }
BtreeBucket *BtreeCache::AllocBucket(FAU_t node_address, int load_bucket) // Connects a B-tree bucket to the specified node and reads the node // from disk if the "load_bucket" variable is true. Returns a pointer // to the bucket or a null value if an error occurred. NOTE: If the // the node already exists in the cache this function will return a // pointer to the bucket. { if(node_address == (FAU_t)0) { // Set the database error code here to ensure that an exception // is properly signaled in the gxBtree cached insert, delete, // and find functions. if(f) f->SetDatabaseError(gxDBASE_CACHE_ERROR); return 0; } // Look for this node in the cache BtreeBucket *bucket = FindBucket(node_address); if(bucket == 0) { // This node is not in the B-tree cache uncached_reads++; bucket = FindEmptyBucket(); if(bucket) { bucket->node_address = node_address; if((load_bucket == 1) && (node_address != (FAU_t)0)) { if(!f) return 0; // Not connected to a database file if(bucket->ReadNode(f) != gxDBASE_NO_ERROR) { return 0; // Error reading the node from disk } } } else { // Set the database error code here to ensure that an exception // is properly signaled in the gxBtree cached insert, delete, // and find functions. if(f) { // PC-lint 04/20/2004: Possible use of null pointer if(f->GetDatabaseError() == gxDBASE_NO_ERROR) { f->SetDatabaseError(gxDBASE_CACHE_ERROR); } } return 0; // Could not find an empty bucket } } else { PromoteBucket(bucket); cached_reads++; } return bucket; }
/* skip unused keys. */ void BtreeCursor::skipUnusedKeys() { int u = 0; while ( 1 ) { if ( !ok() ) break; BtreeBucket *b = bucket.btree(); _KeyNode& kn = b->k(keyOfs); if ( kn.isUsed() ) break; bucket = b->advance(bucket, keyOfs, direction, "skipUnusedKeys"); u++; } if ( u > 10 ) OCCASIONALLY log() << "btree unused skipped:" << u << '\n'; }
/* Since the last noteLocation(), our key may have moved around, and that old cached information may thus be stale and wrong (although often it is right). We check that here; if we have moved, we have to search back for where we were at. i.e., after operations on the index, the BtreeCursor's cached location info may be invalid. This function ensures validity, so you should call it before using the cursor if other writers have used the database since the last noteLocation call. */ void BtreeCursor::checkLocation() { if ( eof() ) return; multikey = d->isMultikey(idxNo); if ( keyOfs >= 0 ) { BtreeBucket *b = bucket.btree(); assert( !keyAtKeyOfs.isEmpty() ); // Note keyAt() returns an empty BSONObj if keyOfs is now out of range, // which is possible as keys may have been deleted. int x = 0; while( 1 ) { if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && b->k(keyOfs).recordLoc == locAtKeyOfs ) { if ( !b->k(keyOfs).isUsed() ) { /* we were deleted but still exist as an unused marker key. advance. */ skipUnusedKeys( false ); } return; } /* we check one key earlier too, in case a key was just deleted. this is important so that multi updates are reasonably fast. */ if( keyOfs == 0 || x++ ) break; keyOfs--; } } /* normally we don't get to here. when we do, old position is no longer valid and we must refind where we left off (which is expensive) */ bool found; /* TODO: Switch to keep indexdetails and do idx.head! */ bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, _ordering, keyOfs, found, locAtKeyOfs, direction); RARELY log() << " key seems to have moved in the index, refinding. found:" << found << endl; if ( ! bucket.isNull() ) skipUnusedKeys( false ); }
int BucketBasics::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount) { { bool f = false; assert( f = true ); massert( 10281 , "assert is misdefined", f); } killCurrentOp.checkForInterrupt(); assertValid(order, true); // if( bt_fv==0 ) // return; if ( bt_dmp ) { out() << thisLoc.toString() << ' '; ((BtreeBucket *) this)->dump(); } // keycount int kc = 0; for ( int i = 0; i < n; i++ ) { _KeyNode& kn = k(i); if ( kn.isUsed() ) { kc++; } else { if ( unusedCount ) { ++( *unusedCount ); } } if ( !kn.prevChildBucket.isNull() ) { DiskLoc left = kn.prevChildBucket; BtreeBucket *b = left.btree(); wassert( b->parent == thisLoc ); kc += b->fullValidate(kn.prevChildBucket, order, unusedCount); } } if ( !nextChild.isNull() ) { BtreeBucket *b = nextChild.btree(); wassert( b->parent == thisLoc ); kc += b->fullValidate(nextChild, order, unusedCount); } return kc; }
unsigned BtreeCache::BucketsInUse() // Returns the total number of dirty buckets. { if(IsEmpty()) return 0; BtreeBucket *bucket = head; unsigned num_dirty = 0; // PC-lint 04/20/2004: Possible use of null pointer if(!bucket) return 0; do { if(bucket->IsDirty()) num_dirty++; bucket = bucket->next; } while(bucket != head); return num_dirty; }
/* skip unused keys. */ bool BtreeCursor::skipUnusedKeys( bool mayJump ) { int u = 0; while ( 1 ) { if ( !ok() ) break; BtreeBucket *b = bucket.btree(); _KeyNode& kn = b->k(keyOfs); if ( kn.isUsed() ) break; bucket = b->advance(bucket, keyOfs, direction, "skipUnusedKeys"); u++; if ( mayJump && ( u % 10 == 0 ) ) { skipOutOfRangeKeysAndCheckEnd(); } } if ( u > 10 ) OCCASIONALLY log() << "btree unused skipped:" << u << '\n'; return u; }
BtreeBucket *BtreeCache::FindEmptyBucket() // Internal processing function used to find the last // bucket in the cache that is empty and promote the // bucket to the front of the list. Returns a pointer // to the bucket or a null value if no buckets can be // made available or an error occurs. For maximum speed // performance the least-recently reserved bucket will // be always be flushed and re-allocated to prevent the // cache from filling up. { if(IsEmpty()) { // Set the database error code here to ensure that an // exception is properly signaled in the gxBtree // cached insert, delete, and find functions. if(f) f->SetDatabaseError(gxDBASE_CACHE_ERROR); return 0; } BtreeBucket *bucket = tail; // PC-lint 04/20/2004: Possible use of null pointer if(!bucket) return 0; if(f) { // PC-lint 04/20/2004: Possible use of null pointer if(f->ReadyForWriting()) { if(bucket->Flush(f) != gxDBASE_NO_ERROR) { return 0; // Error writing to the file } } else { // The file is not ready for writing f->SetDatabaseError(gxDBASE_FILE_NOT_READY); return 0; } } PromoteBucket(bucket); return bucket; }
void BtreeCache::InitCache(BtreeSize_t dbkey_size, BtreeNodeOrder_t order) // Internal processing function used to initialize the cache variables // and setup a circularly linked-list of buckets using a previously // allocated array of cache buckets. { if(IsEmpty()) return; // No memory was allocated for the cache buckets unsigned bucket_size = sizeof(BtreeBucket); // PC-lint 04/20/2004: Possible use of null pointer if(!buckets) return; // Organize the buckets in a circularly linked-list. for(int i = 0; i < num_buckets; i++) { BtreeBucket *p = (BtreeBucket *)((char *)buckets + (i * bucket_size)); p->key_size = dbkey_size; p->node_order = order-1; p->key_count = (BtreeKeyCount_t)0; p->left_child = (FAU_t)0; p->key_entries = new char[(p->key_size * p->node_order)]; p->node_address = (FAU_t)0; p->ResetBucket(); p->prev = (BtreeBucket *)((char *)p - bucket_size); p->next = (BtreeBucket *)((char *)p + bucket_size); } // Set the head and tail pointers tail = (BtreeBucket *)((char *)buckets + (num_buckets-1)*bucket_size); head = (BtreeBucket *)((char *)buckets); tail->next = head; head->prev = tail; // Calculate the total number of bytes allocated BtreeSize_t key_size = dbkey_size * (order-1); cache_size = bucket_size + key_size; cache_size *= num_buckets; }
void BtreeBuilder<V>::buildNextLevel(DiskLoc loc, bool mayInterrupt) { int levels = 1; while( 1 ) { if( _getBucket(loc)->tempNext().isNull() ) { // only 1 bucket at this level. we are done. _btreeState->setHead( loc ); break; } levels++; DiskLoc upLoc = BtreeBucket<V>::addBucket(_btreeState); DiskLoc upStart = upLoc; BtreeBucket<V> *up = _getModifiableBucket( upLoc ); DiskLoc xloc = loc; while( !xloc.isNull() ) { killCurrentOp.checkForInterrupt( !mayInterrupt ); if ( getDur().commitIfNeeded() ) { b = _getModifiableBucket( cur ); up = _getModifiableBucket( upLoc ); } BtreeBucket<V> *x = _getModifiableBucket( xloc ); Key k; DiskLoc r; x->popBack(r,k); bool keepX = ( x->n != 0 ); DiskLoc keepLoc = keepX ? xloc : x->nextChild; if ( ! up->_pushBack(r, k, _btreeState->ordering(), keepLoc) ) { // current bucket full DiskLoc n = BtreeBucket<V>::addBucket(_btreeState); up->setTempNext(n); upLoc = n; up = _getModifiableBucket( upLoc ); up->pushBack(r, k, _btreeState->ordering(), keepLoc); } DiskLoc nextLoc = x->tempNext(); // get next in chain at current level if ( keepX ) { x->parent = upLoc; } else { if ( !x->nextChild.isNull() ) { DiskLoc ll = x->nextChild; _getModifiableBucket(ll)->parent = upLoc; //(x->nextChild.btreemod<V>())->parent = upLoc; } x->deallocBucket( _btreeState, xloc ); } xloc = nextLoc; } loc = upStart; mayCommitProgressDurably(); } if( levels > 1 ) { LOG(2) << "btree levels: " << levels << endl; } }