//remove an existed Entry(_entity_id) from VSTree bool VSTree::removeEntry(int _entity_id) { VNode* leafNodePtr = this->getLeafNodeByEntityID(_entity_id); if (leafNodePtr == NULL) { cerr<< "error, can not find the mapping leaf node. @VSTree::removeEntry" << endl; return false; } // seek the entry index of the leaf node. int entryIndex = -1; int childNum = leafNodePtr->getChildNum(); //cout<<"root file line: "<<this->root_file_line<<" "<<"max nid num: "<<this->max_nid_alloc<<endl; //cout<<"node num: "<<this->node_num<<" "<<"file line: "<<leafNodePtr->getFileLine()<<" "<<"child num: "<<childNum<<endl; for(int i = 0; i < childNum; i++) { if(leafNodePtr->getChildEntry(i).getEntityId() == _entity_id) { entryIndex = i; break; } } if(entryIndex == -1) { cerr << "error, can not find the entry in leaf node. @VSTree::removeEntry" << endl; return false; } //BETTER?:consider up->bopttom to deal, not find leaf and recursively if(leafNodePtr->isRoot()) { if(childNum == 1) { //the tree is empty now leafNodePtr->removeChild(entryIndex); leafNodePtr->refreshAncestorSignature(*(this->node_buffer)); this->removeNode(leafNodePtr); this->root_file_line = -1; this->height = 0; this->entry_num = 0; this->node_num = 0; } else { leafNodePtr->removeChild(entryIndex); leafNodePtr->refreshAncestorSignature(*(this->node_buffer)); } } else { if(childNum <= VNode::MIN_CHILD_NUM) { //cerr << "In VSTree::remove() -- the node is invalid" << endl; //TODO+BETTER:this may search again, too costly //VNode* fatherNodePtr = leafNodePtr->getFather(*(this->node_buffer)); ////int index = leafNodePtr->getIndexInFatherNode(*(this->node_buffer)); //int n = fatherNodePtr->getChildNum(); //for (int i = 0; i < n; ++i) //{ //if (fatherNodePtr->getChildFileLine(i) == leafNodePtr->getFileLine()) //{ //this->coalesce(fatherNodePtr, i, leafNodePtr, entryIndex); //break; //} //} //return false; this->coalesce(leafNodePtr, entryIndex); } else { leafNodePtr->removeChild(entryIndex); leafNodePtr->refreshAncestorSignature(*(this->node_buffer)); } } this->entry_num--; this->entityID2FileLineMap.erase(_entity_id); //NOTICE:insert is costly but can keep balance. //However, remove is not too costly but can not keep balance at all. //And remove maybe error if without coalesce! //If remove and insert are both wonderful, update/replace can both be balanced using delete and insert again. //(not care the balance now, if insert/delete many times, rebuilding is suggested) //we do not consider the situation which the leaf node is to be empty by now... //in a better way, if the leaf node is empty after removing entry, we should delete it. and recursively judge whether its //father is empty, and delete its father node if true. to make the VSTree more balanced, we should combine two nodes if //their child number are less than the MIN_CHILD_NUM. when deleting one node from the tree, we should also remove it from //tree node file in hard disk by doing some operations on the node_buffer. return true; }
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node) { #ifdef DEBUG_VSTREE stringstream _ss; _ss << "**********************split happen at " << _p_node_being_split->getFileLine() << endl; _ss << _p_node_being_split->to_str() << endl; Util::logging(_ss.str()); #endif // first, add the new child node(if not leaf) or child entry(if leaf) to the full node. bool just_insert_entry = (_p_insert_node == NULL); if(just_insert_entry) { _p_node_being_split->addChildEntry(_insert_entry, true); } else { _p_node_being_split->addChildNode(_p_insert_node, true); } SigEntry entryA, entryB; //BETTER: use hanming, xor result or the vector included angle to guess the distince. //And then also use the farest two as seeds. // //two seeds to generate two new nodes. //seedA kernel: the SigEntry with the minimal count of signature. //seedB kernel: the SigEntry with the maximal count of signature. int maxCount = 0; // record the minimal signature count. int entryA_index = 0; // record the seedA kernel index. for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount(); if(maxCount < currentCount) { maxCount = currentCount; entryA_index = i; } } entryA = _p_node_being_split->getChildEntry(entryA_index); maxCount = 0; int entryB_index = 0; // record the seedB kernel index. for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { //NOTICE:I think xOR should be used here to choose the farest two int currentCount = entryA.xOR(_p_node_being_split->getChildEntry(i)); //int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); if(i != entryA_index && maxCount <= currentCount) { maxCount = currentCount; entryB_index = i; } } entryB = _p_node_being_split->getChildEntry(entryB_index); // AEntryIndex: the entry index near seedA. // BEntryIndex: the entry index near seedB. std::vector<int> entryIndex_nearA, entryIndex_nearB; entryIndex_nearA.clear(); entryIndex_nearB.clear(); entryIndex_nearA.push_back(entryA_index); entryIndex_nearB.push_back(entryB_index); int nearA_max_size, nearB_max_size; bool nearA_tooSmall, nearB_tooSmall; for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { if(i == entryA_index || i == entryB_index) continue; //should guarantee that each new node has at least MIN_CHILD_NUM children. nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size(); nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM); if(nearA_tooSmall) { for(; i < VNode::MAX_CHILD_NUM; i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearA.push_back(i); } break; } nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size(); nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM); if(nearB_tooSmall) { for(; i < VNode::MAX_CHILD_NUM; i++) { if(i == entryA_index || i == entryB_index) continue; entryIndex_nearB.push_back(i); } break; } //calculate the distance from //the i-th child entry signature to seedA(or seedB). //NOTICE:we should expect that the candidate can be almost contained! //However, the precondition there are not too many 1s int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i)); // choose the near one seed to add into if(disToSeedA <= disToSeedB) { entryIndex_nearA.push_back(i); } else { entryIndex_nearB.push_back(i); } } // then create a new node to act as BEntryIndex's father. VNode* newNodePtr = this->createNode(); #ifdef DEBUG_VSTREE stringstream _ss2; _ss2 << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl; Util::logging(_ss2.str()); #endif // the old one acts as AEntryIndex's father. VNode* oldNodePtr = _p_node_being_split; // if the old node is leaf, set the new node as a leaf. if(oldNodePtr->isLeaf()) { newNodePtr->setAsLeaf(true); } //add all the entries in BEntryIndex into the new node child entry array, //and calculate the new node's entry. for(unsigned i = 0; i < entryIndex_nearB.size(); i++) { if(oldNodePtr->isLeaf()) { newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false); } else { //debug target 2 VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer)); newNodePtr->addChildNode(childPtr); } } newNodePtr->refreshSignature(); //label the child being removed with -1, //and update the old node's entry. sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>()); #ifdef DEBUG_VSTREE stringstream _ss1; { _ss1 << "nearA: "; for(unsigned i = 0; i < entryIndex_nearA.size(); i++) { _ss1 << entryIndex_nearA[i] << " "; } _ss1 << endl; _ss1 << "nearB: "; for(unsigned i = 0; i < entryIndex_nearB.size(); i++) { _ss1 << entryIndex_nearB[i] << " "; } _ss1 << endl; } Util::logging(_ss1.str()); #endif for(unsigned i = 0; i < entryIndex_nearA.size(); i++) { oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i])); oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i])); } oldNodePtr->setChildNum(entryIndex_nearA.size()); oldNodePtr->refreshSignature(); int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer)); // full node's father pointer. VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer)); if(oldNodePtr->isRoot()) { //if the old node is root, //split the root, create a new root, //and the tree height will be increased. VNode* RootNewPtr = this->createNode(); //change the old root node to not-root node, //and set the RootNew to root node. oldNodePtr->setAsRoot(false); RootNewPtr->setAsRoot(true); //set the split two node(old node and new node) as the new root's child, //and update signatures. RootNewPtr->addChildNode(oldNodePtr); RootNewPtr->addChildNode(newNodePtr); RootNewPtr->refreshSignature(); //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // Util::logging(_ss.str()); // } //should keep the root node always being //at the first line(line zero) of the tree node file. this->swapNodeFileLine(RootNewPtr, oldNodePtr); this->height++; //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl; // _ss << RootNewPtr->to_str() << endl; // Util::logging(_ss.str()); // } } else { //if the (OldNode) is not Root, //change the old node's signature to A's signature. oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry()); if(oldNodeFatherPtr->isFull()) { oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr); } else { oldNodeFatherPtr->addChildNode(newNodePtr); oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); } } //debug // if (!oldNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl; // Util::logging(_ss.str()); // } // if (!newNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap by these two nodes. this->updateEntityID2FileLineMap(oldNodePtr); this->updateEntityID2FileLineMap(newNodePtr); }
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node) { //debug // { // stringstream _ss; // _ss << "**********************split happen at " // << _p_node_being_split->getFileLine() << endl; // _ss << _p_node_being_split->to_str() << endl; // Util::logging(_ss.str()); // } // first, add the new child node(if not leaf) or child entry(if leaf) to the full node. bool just_insert_entry = (_p_insert_node == NULL); if (just_insert_entry) { _p_node_being_split->addChildEntry(_insert_entry, true); } else { _p_node_being_split->addChildNode(_p_insert_node, true); } SigEntry entryA, entryB; /* two seeds to generate two new nodes. * seedA kernel: the SigEntry with the minimal count of signature. * seedB kernel: the SigEntry with the second minimal count of signature. * */ int minCount = 0; // record the minimal signature count. int entryA_index = 0; // record the seedA kernel index. for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount(); if (minCount < currentCount) { minCount = currentCount; entryA_index = i; } } entryA = _p_node_being_split->getChildEntry(entryA_index); minCount = 0; int entryB_index = 0; // record the seedB kernel index. for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); if (i != entryA_index && minCount <= currentCount) { minCount = currentCount; entryB_index = i; } } entryB = _p_node_being_split->getChildEntry(entryB_index); // AEntryIndex: the entry index near seedA. // BEntryIndex: the entry index near seedB. std::vector<int> entryIndex_nearA, entryIndex_nearB; entryIndex_nearA.clear(); entryIndex_nearB.clear(); entryIndex_nearA.push_back(entryA_index); entryIndex_nearB.push_back(entryB_index); /* just tmp variables, for more readibility */ int nearA_max_size, nearB_max_size; bool nearA_tooSmall, nearB_tooSmall; for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; /* should guarantee that each new node has at least MIN_CHILD_NUM children. */ nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size(); nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM); if (nearA_tooSmall) { for (;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearA.push_back(i); } break; } nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size(); nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM); if (nearB_tooSmall) { for (;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearB.push_back(i); } break; } /* calculate the distance from * the i-th child entry signature to seedA(or seedB).*/ /*debug target 1*/ int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i)); // choose the near one seed to add into if (disToSeedA <= disToSeedB) { entryIndex_nearA.push_back(i); } else { entryIndex_nearB.push_back(i); } } // then create a new node to act as BEntryIndex's father. VNode* newNodePtr = this->createNode(); //debug // { // stringstream _ss; // _ss << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl; // Util::logging(_ss.str()); // } // the old one acts as AEntryIndex's father. VNode* oldNodePtr = _p_node_being_split; // if the old node is leaf, set the new node as a leaf. if (oldNodePtr->isLeaf()) { newNodePtr->setAsLeaf(true); } /* add all the entries in BEntryIndex into the new node child entry array, and calculate the new node's entry.*/ for (unsigned i=0;i<entryIndex_nearB.size();i++) { if (oldNodePtr->isLeaf()) { newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false); } else { /*debug target 2*/ VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer)); newNodePtr->addChildNode(childPtr); } } newNodePtr->refreshSignature(); /* label the child being removed with -1, * and update the old node's entry.*/ std::sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>()); //debug // { // stringstream _ss; // { // _ss << "nearA: "; // for(int i = 0; i < entryIndex_nearA.size(); i ++) // { // _ss << entryIndex_nearA[i] << " "; // } // _ss << endl; // // _ss << "nearB: "; // for(int i = 0; i < entryIndex_nearB.size(); i ++) // { // _ss << entryIndex_nearB[i] << " "; // } // _ss << endl; // } // Util::logging(_ss.str()); // } for (unsigned i=0;i<entryIndex_nearA.size();i++) { oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i])); oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i])); } oldNodePtr->setChildNum(entryIndex_nearA.size()); oldNodePtr->refreshSignature(); int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer)); // full node's father pointer. VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer)); if (oldNodePtr->isRoot()) { /* if the old node is root, * split the root, create a new root, * and the tree height will be increased.*/ VNode* RootNewPtr = this->createNode(); /* change the old root node to not-root node, * and set the RootNew to root node.*/ oldNodePtr->setAsRoot(false); RootNewPtr->setAsRoot(true); /* set the split two node(old node and new node) as the new root's child, * and update signatures.*/ RootNewPtr->addChildNode(oldNodePtr); RootNewPtr->addChildNode(newNodePtr); RootNewPtr->refreshSignature(); //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // Util::logging(_ss.str()); // } /* should keep the root node always being * at the first line(line zero) of the tree node file.*/ this->swapNodeFileLine(RootNewPtr, oldNodePtr); this->height ++; //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl; // _ss << RootNewPtr->to_str() << endl; // Util::logging(_ss.str()); // } } else { /* if the (OldNode) is not Root, * change the old node's signature to A's signature.*/ oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry()); if (oldNodeFatherPtr->isFull()) { oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr); } else { oldNodeFatherPtr->addChildNode(newNodePtr); oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); } } //debug // if (!oldNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl; // Util::logging(_ss.str()); // } // if (!newNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap by these two nodes. this->updateEntityID2FileLineMap(oldNodePtr); this->updateEntityID2FileLineMap(newNodePtr); }