BufferFrame* BPlusSegment<K,V>::fixLeafFor(const K& key, const bool exclusive) const{ BufferFrame* pageOfKey = &bm.fixPage(segmentId, root, exclusive); while(!isLeaf(pageOfKey->getData())){ BPlusPage<K, PageID> page(pageOfKey->getData(), pageSize, cmp); PageID nextPage; try{ nextPage = page.lookupSmallestGreaterThan(key).value; }catch(NotFoundException e){ //upper exists? if (page.getUpperExists()){ nextPage = page.getUpper(); }else{ bm.unfixPage(*pageOfKey, false); throw NotFoundException(); } } BufferFrame* oldBF = pageOfKey; pageOfKey = &bm.fixPage(segmentId, nextPage, exclusive); bm.unfixPage(*oldBF, false); } return pageOfKey; }
V BPlusSegment<K, V>::findGreatestKey(BufferFrame* startFrame) const{ BufferFrame* pageFrame = startFrame; bool isFirst = true; while(!isLeaf(pageFrame->getData())){ BPlusPage<K, PageID> page(pageFrame->getData(), pageSize, cmp); PageID nextPage; if (page.getUpperExists()){ nextPage = page.getUpper(); }else{ nextPage = page.getValueOfHighestKey(); } BufferFrame* oldBF = pageFrame; pageFrame = &bm.fixPage(segmentId, nextPage, false); //don't unfix first frame, as it is controlled by caller if (isFirst){ isFirst = false; }else{ bm.unfixPage(*oldBF, false); } } BPlusPage<K, V> page(pageFrame->getData(), pageSize, cmp); K highestKey = page.getHighestKey(); if (!isFirst) bm.unfixPage(*pageFrame, false); return highestKey; }
inline void BTree<KeyType, KeyComparator>::insert(KeyType key, TID tid) { size_t currHeight = 0; uint64_t currPageId = rootPageId; BufferFrame *parentFrame = nullptr; BufferFrame *currFrame = nullptr; InnerNode<KeyType, KeyComparator> *parentNode = nullptr; InnerNode<KeyType, KeyComparator> *currNode = nullptr; while (!isLeafHeight(currHeight)) { if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, true); } parentFrame = currFrame; parentNode = currNode; currFrame = &bufferManager.fixPage(this->segmentId, currPageId, true); currNode = reinterpret_cast<InnerNode<KeyType, KeyComparator> *>(currFrame->getData()); if (!currNode->hasSpaceForOneMoreEntry()) { if (parentNode == nullptr) { auto newNode = createEmptyNode(currPageId); parentFrame = newNode.first; parentNode = newNode.second; currHeight++; } auto splitResult = splitInnerNode(currNode, currFrame, currPageId, parentNode, key); currFrame = splitResult.first; currNode = splitResult.second; } currPageId = currNode->getNextNode(key, this->smallerComparator); currHeight++; } // we are now at leaf height - the currPageId points to a leaf if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, true); } parentFrame = currFrame; parentNode = currNode; currNode = nullptr; currFrame = &bufferManager.fixPage(this->segmentId, currPageId, true); auto leaf = reinterpret_cast<Leaf<KeyType, KeyComparator> *>(currFrame->getData()); if (!leaf->hasSpaceForOneMoreEntry()) { if (parentNode == nullptr) { auto newNode = createEmptyNode(currPageId); parentFrame = newNode.first; parentNode = newNode.second; } auto splitResult = splitLeaf(leaf, currFrame, currPageId, parentNode, key); currFrame = splitResult.first; leaf = splitResult.second; } if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, true); } leaf->insertDefiniteFit(key, tid, smallerComparator); bufferManager.unfixPage(*currFrame, true); treeSize++; }
std::vector<uint64_t> BTree<K, Comp>::lookupRange(K key1, K key2) { std::vector < uint64_t > resultSet; //Get Leaf of lower key K leftK, rightK; if (smaller(key1, key2)) { leftK = key1; rightK = key2; } else { leftK = key2; rightK = key1; } BufferFrame* leafFrame = traverseToLeaf(leftK, false); Node<K, Comp>* leaf = reinterpret_cast<Node<K, Comp>*>(leafFrame->getData()); uint64_t pos = leaf->findKeyPos(leftK, smaller); if (pos >= leaf->count || smaller(leaf->keyValuePairs[pos].first, leftK)) { //No matching key was found bufferManager.unfixPage(*leafFrame, false); return resultSet; } while (true) { while (pos < leaf->count) { if (smaller(rightK, leaf->keyValuePairs[pos].first)) { bufferManager.unfixPage(*leafFrame, false); return resultSet; } resultSet.push_back(leaf->keyValuePairs[pos].second); pos++; } if (leaf->next == std::numeric_limits<uint64_t>::max()) { //There is no next leaf --> return bufferManager.unfixPage(*leafFrame, false); return resultSet; } else { //Continue in next Leaf. Get it and unfix current Leaf uint64_t nextLeafPID = leaf->next; bufferManager.unfixPage(*leafFrame, false); leafFrame = &bufferManager.fixPage(nextLeafPID, false); leaf = reinterpret_cast<Node<K, Comp>*>(leafFrame->getData()); pos = 0; } } bufferManager.unfixPage(*leafFrame, false); return resultSet; }
BufferFrame* BTree<K, Comp>::createNewRoot() { BufferFrame* newFrame = &bufferManager.fixPage(nextFreePage++, true); rootPID = newFrame->pageId; Node<K, Comp>* newRoot = reinterpret_cast<Node<K, Comp>*>(newFrame->getData()); *newRoot = Node<K, Comp>(false); return newFrame; }
SplitResult<K> BPlusSegment<K,V>::splitPage(BufferFrame &frame, const bool inner){ //TODO: find a better way to work with the types here if (inner){ //notice: K, PageID here instead of V BPlusPage<K, PageID> page(frame.getData(), pageSize, cmp); assert(!page.hasAdditionalSpace()); PageID siblingPageID = pageCount; BufferFrame& siblingFrame = bm.fixPage(segmentId, siblingPageID, true); pageCount++; //notice: K, PageID here instead of V BPlusPage<K, PageID> sibling(siblingFrame.getData(), pageSize, cmp); sibling.initialize(); sibling.setLeaf(false); sibling.takeUpperFrom(page); //handle upper key on split if (page.getUpperExists()){ K newKey = findGreatestKey(&frame); sibling.insert(newKey, page.getUpper()); page.setUpperNotExists(); } K siblingHighestKey = sibling.getHighestKey(); K pageHighestKey = page.getHighestKey(); bm.unfixPage(siblingFrame, true); return SplitResult<K>(siblingPageID, siblingHighestKey, pageHighestKey, true, frame); }else{ BPlusPage<K, V> page(frame.getData(), pageSize, cmp); assert(!page.hasAdditionalSpace()); PageID siblingPageID = pageCount; BufferFrame& siblingFrame = bm.fixPage(segmentId, siblingPageID, true); pageCount++; BPlusPage<K, V> sibling(siblingFrame.getData(), pageSize, cmp); sibling.initialize(); sibling.takeUpperFrom(page); K siblingHighestKey = sibling.getHighestKey(); K pageHighestKey = page.getHighestKey(); bm.unfixPage(siblingFrame, true); return SplitResult<K>(siblingPageID, siblingHighestKey, pageHighestKey, true, frame); } }
bool BTree<K, Comp>::erase(K key) { BufferFrame* leafFrame = traverseToLeaf(key, true); Node<K, Comp>* leaf = reinterpret_cast<Node<K, Comp>*>(leafFrame->getData()); bool deleted = leaf->deleteKey(key, smaller); bufferManager.unfixPage(*leafFrame, true); if (deleted) { elements--; //update size of BTree } return deleted; }
inline bool BTree<KeyType, KeyComparator>::searchForKey( KeyType key, TID &tid, uint64_t pageId, size_t currentHeight) { BufferFrame *currentFrame = &bufferManager.fixPage(this->segmentId, pageId, false); bool result; if (isLeafHeight(currentHeight)) { Leaf<KeyType, KeyComparator> *leaf = reinterpret_cast<Leaf<KeyType, KeyComparator> *>( currentFrame->getData()); result = leaf->lookup(key, smallerComparator, &tid); } else { //we haven't reached the leaves yet InnerNode<KeyType, KeyComparator> *currNode = reinterpret_cast<InnerNode<KeyType, KeyComparator> *> ( currentFrame->getData()); pageId = currNode->getNextNode(key, smallerComparator); result = searchForKey(key, tid, pageId, currentHeight + 1); } //return page as result was received and page is no longer required bufferManager.unfixPage(*currentFrame, false); return result; }
inline std::vector<TID> BTree<KeyType, KeyComparator>::lookupRange(KeyType begin, KeyType end) { KeyType left = begin; KeyType right = end; /* if given end-limit is lower than begin, we need to swap both borders*/ if (smallerComparator(end, begin)) { left = end; right = begin; } std::vector<TID> lookupSet; Leaf<KeyType, KeyComparator> leftLeaf = getLeaf(left); int position = EntriesHelper::findPosition<KeyType, KeyComparator, TID>( leftLeaf.entries, left, 0, leftLeaf.header.keyCount, smallerComparator); BufferFrame * currentFrame = nullptr; while (true) { while (position < leftLeaf.header.keyCount) { Entry<KeyType, TID> entry = leftLeaf.entries[position]; if (begin <= entry.key && entry.key <= end){ lookupSet.push_back(entry.value); } position++; } if (position == leftLeaf.header.keyCount) { //reached end of leaf and need to check the next leaf uint64_t nextLeaf = leftLeaf.header.nextLeafPageId; if (nextLeaf != LeafHeader::INVALID_PAGE_ID) { // set next leaf and reset position to first entry if (currentFrame != nullptr){ bufferManager.unfixPage(*currentFrame, false); currentFrame = nullptr; } currentFrame = &bufferManager.fixPage(this->segmentId, nextLeaf, true); leftLeaf = * reinterpret_cast<Leaf<KeyType, KeyComparator> *>(currentFrame->getData()); position = 0; } else { // end of leaves reached, we cannot look further so we return the set break; } } else { break; } } if (currentFrame != nullptr){ bufferManager.unfixPage(*currentFrame, false); } return lookupSet; }
boost::optional<uint64_t> BTree<K, Comp>::lookup(K key) { BufferFrame* leafFrame = traverseToLeaf(key, false); Node<K, Comp>* leaf = reinterpret_cast<Node<K, Comp>*>(leafFrame->getData()); uint64_t pos = leaf->findKeyPos(key, smaller); uint64_t tid = std::numeric_limits<uint64_t>::max(); bool found = false; if (pos < leaf->count && isEqual(key, leaf->keyValuePairs[pos].first, smaller)) { found = true; tid = leaf->keyValuePairs[pos].second; } bufferManager.unfixPage(*leafFrame, false); return boost::optional<uint64_t> { found, tid }; }
// gets the requested data from file and puts a new bufferFrame into the hashmap // if required, this method replaces frames in the buffer void BufferManager::cachePageFromFile(unsigned pageId) { // lock hash table and lru queue pthread_rwlock_wrlock(&buffer_latch); pthread_rwlock_wrlock(&lruBuffer_latch); // if buffer is full, replace a frame in buffer with page from file if(lruBuffer.size() >= size) { // Search for the first lockable buffer frame BufferFrame *toBeFreed = NULL; for(vector<BufferFrame*>::iterator i = lruBuffer.begin(); i != lruBuffer.end(); ++i) { // try to lock this frame, if not possible continue with next frame in lruBuffer int lockResult = pthread_rwlock_trywrlock(&((*i)->latch)); //cout << "lockResult: " << lockResult << " for pageId " << (*i)->pageId << endl; if(lockResult == 0) { pthread_rwlock_unlock(&((*i)->latch)); // cout << (*i)->pageId << " wr_locked and unlocked\n"; toBeFreed = *i; // if we found our candidate frame, delete it from the buffers and deallocate the memory int cnt = buffer.erase(toBeFreed->pageId); // delete from hash map assert(cnt == 1); toBeFreed->writeDataToFile(); // save changes lruBuffer.erase(i); //free mmapped memory if (munmap(toBeFreed->getData(), FRAME_SIZE) == -1) { cerr << "Error un-mmapping the file"; } delete toBeFreed; // deallocate memory break; } } if(toBeFreed==NULL) { cerr<<"no candidate was found => release locks and return!!!"; pthread_rwlock_unlock(&lruBuffer_latch); pthread_rwlock_unlock(&buffer_latch); return; } } // add new frame to hash map and append to end of lruBuffer BufferFrame *bf = new BufferFrame(file, pageId); buffer[pageId] = bf; lruBuffer.push_back(bf); // release locks pthread_rwlock_unlock(&lruBuffer_latch); pthread_rwlock_unlock(&buffer_latch); }
BufferFrame* BTree<K, Comp>::traverseToLeaf(K key, bool exclusiveLeaf) { //latch the root BufferFrame* curFrame = &bufferManager.fixPage(rootPID, exclusiveLeaf); Node<K, Comp>* curNode = reinterpret_cast<Node<K, Comp>*>(curFrame->getData()); BufferFrame* parFrame = NULL; while (!curNode->isLeaf()) { //unlatch parent if (parFrame != NULL) { bufferManager.unfixPage(*parFrame, false); } parFrame = curFrame; uint64_t pos = curNode->findKeyPos(key, smaller); uint64_t nextPID = (pos == curNode->count) ? curNode->next : curNode->keyValuePairs[pos].second; //latch the next level curFrame = &bufferManager.fixPage(nextPID, exclusiveLeaf); curNode = reinterpret_cast<Node<K, Comp>*>(curFrame->getData()); } if (parFrame != NULL) { bufferManager.unfixPage(*parFrame, false); } return curFrame; }
bool SPSegment::remove(TID tid){ // TODO assert(tid.getPage() is part of this segment); BufferFrame frame = bm->fixPage(tid.getPage(), true); SlottedPage* page = reinterpret_cast<SlottedPage*>(frame.getData()); unsigned space = page->remove(tid.getSlot()); // Update FSI this->fsi[tid.getPage()] = space; bm->unfixPage(frame, true); if (tid.tid == lastTID.tid) { // TODO Update lastTID } return true; }
Record SPSegment::inPlaceLookup(TID tid) { // TODO assert(tid.getPage() is part of this segment); BufferFrame frame = bm->fixPage(tid.getPage(), false); SlottedPage* page = reinterpret_cast<SlottedPage*>(frame.getData()); Slot* slot = page->getSlot(tid.getSlot()); if (slot->isMoved() || slot->isEmpty()) { // Slot is empty: Return empty record bm->unfixPage(frame, false); return Record(0, nullptr); } else { // Slot has content: Return record with content. bm->unfixPage(frame, false); return Record(slot->length(), slot->getRecord()->getData()); } }
TID SPSegment::insert(const Record& r){ // Find page with enough space for r uint64_t pageId = this->lastPage + 1; for (auto it = this->fsi.rbegin(); it != this->fsi.rend(); it++) { if (it->second >= r.getLen()) { pageId = it->first; break; } } // If necessary, create new SlottedPage BufferFrame frame = bm->fixPage(pageId, true); SlottedPage* page = reinterpret_cast<SlottedPage*>(frame.getData()); if (pageId > this->lastPage) { *page = SlottedPage(); this->lastPage++; if (lastPage > 1l << 48) throw "Max page number reached."; } // TODO Reorder record ? // Write to page unsigned slotNum = page->insert(r); bm->unfixPage(frame, true); // Update lastTID TID newTID = TID(pageId, slotNum); if (newTID.tid > this->lastTID.tid) { this->lastTID = newTID; } // Update FSI this->fsi[newTID.getPage()] -= r.getLen() + (slotNum == page->getMaxSlot() ? sizeof(Slot) : 0); return newTID; }
Record SPSegment::lookup(TID tid) { // TODO assert(tid.getPage() is part of this segment); BufferFrame frame = bm->fixPage(tid.getPage(), false); SlottedPage* page = reinterpret_cast<SlottedPage*>(frame.getData()); Slot* slot = page->getSlot(tid.getSlot()); if (slot->isMoved()) { // Slot was indirected: Lookup that TID the slot points to recursively. bm->unfixPage(frame, false); return this->lookup(TID(slot->slot)); } else if (slot->length() == 0 && slot->offset() == 0) { // Slot is empty: Return empty record bm->unfixPage(frame, false); return Record(0, nullptr); } else { // Slot has content: Return record with content. bm->unfixPage(frame, false); return Record(slot->length(), slot->getRecord()->getData()); } }
/** * Reads a slotted page from buffer-frame by pageId and manages sp-map * * @param pageId: the page id * * @return rtrn: SlottedPage */ SlottedPage* SPSegment::readFromFrame(uint64_t pageId) { SlottedPage* rtrn = NULL; BufferFrame frame = bm->fixPage(pageId, false); try { // 1st step: deserialize rtrn = SlottedPage::getDeserialized((char*) frame.getData()); // 2nd step: update sp-map spMap[pageId] = rtrn; } catch (exception& e) { cerr << "An exception occurred while reading slotted page from frame: " << e.what() << endl; rtrn = NULL; } bm->unfixPage(frame, false); return rtrn; }
/** * Writes a slotted page into a given buffer-frame * * @param sp: the slotted page * @param pageId: the page id * * @return rtrn: whether successfully or not */ bool SPSegment::writeToFrame(SlottedPage* sp, uint64_t pageId) { bool rtrn = true; BufferFrame frame = bm->fixPage(pageId, true); try { // 1st step: serialize char* spSer = sp->getSerialized(); // 2nd step: write into frame data pointer memcpy(frame.getData(), spSer, bm->getPageSize()); } catch (exception& e) { cerr << "An exception occurred while writing slotted page to frame: " << e.what() << endl; rtrn = false; } bm->unfixPage(frame, rtrn); return rtrn; }
Leaf<KeyType, KeyComparator> BTree<KeyType, KeyComparator>::getMostLeftLeaf() { BufferFrame *currentFrame = &bufferManager.fixPage(this->segmentId, rootPageId, false); int currentDepth = 0; BufferFrame *parentFrame = nullptr; while (currentDepth != height) { InnerNode<KeyType, KeyComparator> *curNode = reinterpret_cast<InnerNode<KeyType, KeyComparator> *> (currentFrame->getData()); if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, false); } Entry<KeyType, uint64_t> entry = curNode->entries[0]; //most left value = 1 uint64_t pageId = entry.value; parentFrame = currentFrame; currentFrame = &bufferManager.fixPage(this->segmentId, pageId, false); currentDepth++; } if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, false); } Leaf<KeyType, KeyComparator> *leaf = reinterpret_cast<Leaf<KeyType, KeyComparator> *>(currentFrame->getData()); bufferManager.unfixPage(*currentFrame, false); return *leaf; }
Leaf<KeyType, KeyComparator> &BTree<KeyType, KeyComparator>::getLeaf(KeyType key) { BufferFrame *frame = findFrameForKey(key, false); Leaf<KeyType, KeyComparator> *leaf = reinterpret_cast<Leaf<KeyType, KeyComparator> *>(frame->getData()); bufferManager.unfixPage(*frame, false); return *leaf; }
BufferFrame *BTree<KeyType, KeyComparator>::findFrameForKey(KeyType key, bool exclusive) { BufferFrame *currentFrame = &bufferManager.fixPage(this->segmentId, rootPageId, exclusive); int currentDepth = 0; BufferFrame *parentFrame = nullptr; while (!isLeafHeight(currentDepth)) { InnerNode<KeyType, KeyComparator> *curNode = reinterpret_cast<InnerNode<KeyType, KeyComparator> *> (currentFrame->getData()); if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, false); } int nextPageId = curNode->getNextNode(key, smallerComparator); parentFrame = currentFrame; currentFrame = &bufferManager.fixPage(this->segmentId, nextPageId, exclusive); currentDepth++; } if (parentFrame != nullptr) { bufferManager.unfixPage(*parentFrame, false); } //frame is fixed and has to be unfixed by the caller!! return currentFrame; }
bool BTree<K, Comp>::insert(K key, uint64_t tid) { //latch the root BufferFrame* curFrame = &bufferManager.fixPage(rootPID, true); Node<K, Comp>* curNode = reinterpret_cast<Node<K, Comp>*>(curFrame->getData()); BufferFrame* parFrame = NULL; while (!curNode->isLeaf()) { if (curNode->count >= maxNodeSize) { // --> split to safe inner pages if (parFrame == NULL) { //Need to create a new root (parent) first parFrame = createNewRoot(); } BufferFrame* newFrame = &bufferManager.fixPage(nextFreePage++, true); K splitKey = curNode->split(curFrame->pageId, newFrame, parFrame, smaller); //determine correct node and release the other one if (smaller(key, splitKey)) { bufferManager.unfixPage(*newFrame, true); } else { curNode = reinterpret_cast<Node<K, Comp>*>(newFrame->getData()); bufferManager.unfixPage(*curFrame, true); curFrame = newFrame; } } //release the parent node if (parFrame != NULL) { bufferManager.unfixPage(*parFrame, true); //TODO only set true when parent is really dirty? } parFrame = curFrame; //latch the next level uint64_t pos = curNode->findKeyPos(key, smaller); uint64_t nextPID = (pos == curNode->count) ? curNode->next : curNode->keyValuePairs[pos].second; curFrame = &bufferManager.fixPage(nextPID, true); curNode = reinterpret_cast<Node<K, Comp>*>(curFrame->getData()); } Node<K, Comp>* leaf = reinterpret_cast<Node<K, Comp>*>(curNode); if (leaf->count >= maxNodeSize) { if (parFrame == NULL) { parFrame = createNewRoot(); } BufferFrame* newFrame = &bufferManager.fixPage(nextFreePage++, true); K splitKey = leaf->split(curFrame->pageId, newFrame, parFrame, smaller); if (smaller(key, splitKey)) { bufferManager.unfixPage(*newFrame, true); } else { leaf = reinterpret_cast<Node<K, Comp>*>(newFrame->getData()); bufferManager.unfixPage(*curFrame, true); curFrame = newFrame; } } if (parFrame != NULL) { bufferManager.unfixPage(*parFrame, true); //TODO: only mark dirty when parent was actually updated } bool insertSuccessful = leaf->insertKey(key, tid, smaller); if (insertSuccessful) { elements++; } bufferManager.unfixPage(*curFrame, true); return insertSuccessful; }
bool SPSegment::update(TID tid, const Record& r){ Record r_old = this->lookup(tid); unsigned len_old = r_old.getLen(); unsigned len_new = r.getLen(); BufferFrame frame = bm->fixPage(tid.getPage(), true); SlottedPage* page = reinterpret_cast<SlottedPage*>(frame.getData()); if(len_old == len_new){ // If size doesn't change, use memcpy memcpy(page->getSlot(tid.getSlot())->getRecord(), &r, r.getLen()); } else if(len_old > len_new){ // Record has become smaller memcpy(page->getSlot(tid.getSlot())->getRecord(), &r, r.getLen()); // TODO Update freeSpace of page // TODO update FSI } else { // Record has become larger unsigned freeSpaceOnPage = page->remove(tid.getSlot()); this->fsi[tid.getPage()] = freeSpaceOnPage; if (freeSpaceOnPage >= len_new) { // It fits on the page after removal page->insert(r); } else { // Even after removal it is too large // Get another page uint64_t sndPageId = this->lastPage + 1; for (auto it = this->fsi.rbegin(); it != this->fsi.rend(); it++) { if (it->second >= r.getLen()) { sndPageId = it->first; break; } } BufferFrame sndFrame = bm->fixPage(sndPageId, true); // Create a new SlottedPage if necessary SlottedPage* sndPage = reinterpret_cast<SlottedPage*>(sndFrame.getData()); if (sndPageId > this->lastPage) { *sndPage = SlottedPage(); this->lastPage++; if (lastPage > 1l << 48) throw "Max page number reached."; } Slot* fstSlot = page->getSlot(tid.getSlot()); assert(fstSlot->isEmpty()); // Insert into new page unsigned sndSlotNum = sndPage->insert(r); this->fsi[sndPageId] -= r.getLen() + (sndSlotNum == sndPage->getMaxSlot() ? sizeof(Slot) : 0); // Update first slot to directo to second page. *fstSlot = Slot(TID(sndPageId, sndSlotNum)); bm->unfixPage(sndFrame, true); } } bm->unfixPage(frame, true); return true; }