//insert an new Entry, whose entity doesn't exist before bool VSTree::insertEntry(const SigEntry& _entry) { //choose the best leaf node to insert the _entry VNode* choosedNodePtr = this->chooseNode(this->getRoot(), _entry); #ifdef DEBUG_VSTREE if (_entry.getEntityId() == 4000001) { stringstream _ss; if (choosedNodePtr) { _ss << "insert " << _entry.getEntityId() << " into [" << choosedNodePtr->getFileLine() << "],\t"; _ss << "whose childnum is " << choosedNodePtr->getChildNum() << endl; } else { _ss << "insert " << _entry.getEntityId() << " , can not choose a leaf node to insert entry. @VSTree::insert" << endl; } Util::logging(_ss.str()); } #endif if (choosedNodePtr == NULL) { cerr << "error, can not choose a leaf node to insert entry. @VSTree::insert" << endl; return false; } if (choosedNodePtr->isFull()) { //if the choosed leaf node to insert is full, the node should be split. this->split(choosedNodePtr, _entry, NULL); //debug // if (!choosedNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << choosedNodePtr->getFileLine() << " childFileLine error. after split" << endl; // Util::logging(_ss.str()); // } } else { choosedNodePtr->addChildEntry(_entry, false); choosedNodePtr->refreshAncestorSignature(*(this->node_buffer)); //debug // if (!choosedNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << choosedNodePtr->getFileLine() << " childFileLine error. after addChildEntry" << endl; // _ss <<"child num=" << choosedNodePtr->getChildNum() << endl; // _ss <<"node num=" << this->node_num << " entry num=" << this->entry_num << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap. this->entityID2FileLineMap[_entry.getEntityId()] = choosedNodePtr->getFileLine(); } this->entry_num ++; return true; }
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node) { #ifdef DEBUG_VSTREE stringstream _ss; _ss << "**********************split happen at " << _p_node_being_split->getFileLine() << endl; _ss << _p_node_being_split->to_str() << endl; Util::logging(_ss.str()); #endif // first, add the new child node(if not leaf) or child entry(if leaf) to the full node. bool just_insert_entry = (_p_insert_node == NULL); if(just_insert_entry) { _p_node_being_split->addChildEntry(_insert_entry, true); } else { _p_node_being_split->addChildNode(_p_insert_node, true); } SigEntry entryA, entryB; //BETTER: use hanming, xor result or the vector included angle to guess the distince. //And then also use the farest two as seeds. // //two seeds to generate two new nodes. //seedA kernel: the SigEntry with the minimal count of signature. //seedB kernel: the SigEntry with the maximal count of signature. int maxCount = 0; // record the minimal signature count. int entryA_index = 0; // record the seedA kernel index. for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount(); if(maxCount < currentCount) { maxCount = currentCount; entryA_index = i; } } entryA = _p_node_being_split->getChildEntry(entryA_index); maxCount = 0; int entryB_index = 0; // record the seedB kernel index. for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { //NOTICE:I think xOR should be used here to choose the farest two int currentCount = entryA.xOR(_p_node_being_split->getChildEntry(i)); //int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); if(i != entryA_index && maxCount <= currentCount) { maxCount = currentCount; entryB_index = i; } } entryB = _p_node_being_split->getChildEntry(entryB_index); // AEntryIndex: the entry index near seedA. // BEntryIndex: the entry index near seedB. std::vector<int> entryIndex_nearA, entryIndex_nearB; entryIndex_nearA.clear(); entryIndex_nearB.clear(); entryIndex_nearA.push_back(entryA_index); entryIndex_nearB.push_back(entryB_index); int nearA_max_size, nearB_max_size; bool nearA_tooSmall, nearB_tooSmall; for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { if(i == entryA_index || i == entryB_index) continue; //should guarantee that each new node has at least MIN_CHILD_NUM children. nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size(); nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM); if(nearA_tooSmall) { for(; i < VNode::MAX_CHILD_NUM; i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearA.push_back(i); } break; } nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size(); nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM); if(nearB_tooSmall) { for(; i < VNode::MAX_CHILD_NUM; i++) { if(i == entryA_index || i == entryB_index) continue; entryIndex_nearB.push_back(i); } break; } //calculate the distance from //the i-th child entry signature to seedA(or seedB). //NOTICE:we should expect that the candidate can be almost contained! //However, the precondition there are not too many 1s int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i)); // choose the near one seed to add into if(disToSeedA <= disToSeedB) { entryIndex_nearA.push_back(i); } else { entryIndex_nearB.push_back(i); } } // then create a new node to act as BEntryIndex's father. VNode* newNodePtr = this->createNode(); #ifdef DEBUG_VSTREE stringstream _ss2; _ss2 << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl; Util::logging(_ss2.str()); #endif // the old one acts as AEntryIndex's father. VNode* oldNodePtr = _p_node_being_split; // if the old node is leaf, set the new node as a leaf. if(oldNodePtr->isLeaf()) { newNodePtr->setAsLeaf(true); } //add all the entries in BEntryIndex into the new node child entry array, //and calculate the new node's entry. for(unsigned i = 0; i < entryIndex_nearB.size(); i++) { if(oldNodePtr->isLeaf()) { newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false); } else { //debug target 2 VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer)); newNodePtr->addChildNode(childPtr); } } newNodePtr->refreshSignature(); //label the child being removed with -1, //and update the old node's entry. sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>()); #ifdef DEBUG_VSTREE stringstream _ss1; { _ss1 << "nearA: "; for(unsigned i = 0; i < entryIndex_nearA.size(); i++) { _ss1 << entryIndex_nearA[i] << " "; } _ss1 << endl; _ss1 << "nearB: "; for(unsigned i = 0; i < entryIndex_nearB.size(); i++) { _ss1 << entryIndex_nearB[i] << " "; } _ss1 << endl; } Util::logging(_ss1.str()); #endif for(unsigned i = 0; i < entryIndex_nearA.size(); i++) { oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i])); oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i])); } oldNodePtr->setChildNum(entryIndex_nearA.size()); oldNodePtr->refreshSignature(); int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer)); // full node's father pointer. VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer)); if(oldNodePtr->isRoot()) { //if the old node is root, //split the root, create a new root, //and the tree height will be increased. VNode* RootNewPtr = this->createNode(); //change the old root node to not-root node, //and set the RootNew to root node. oldNodePtr->setAsRoot(false); RootNewPtr->setAsRoot(true); //set the split two node(old node and new node) as the new root's child, //and update signatures. RootNewPtr->addChildNode(oldNodePtr); RootNewPtr->addChildNode(newNodePtr); RootNewPtr->refreshSignature(); //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // Util::logging(_ss.str()); // } //should keep the root node always being //at the first line(line zero) of the tree node file. this->swapNodeFileLine(RootNewPtr, oldNodePtr); this->height++; //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl; // _ss << RootNewPtr->to_str() << endl; // Util::logging(_ss.str()); // } } else { //if the (OldNode) is not Root, //change the old node's signature to A's signature. oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry()); if(oldNodeFatherPtr->isFull()) { oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr); } else { oldNodeFatherPtr->addChildNode(newNodePtr); oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); } } //debug // if (!oldNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl; // Util::logging(_ss.str()); // } // if (!newNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap by these two nodes. this->updateEntityID2FileLineMap(oldNodePtr); this->updateEntityID2FileLineMap(newNodePtr); }
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node) { //debug // { // stringstream _ss; // _ss << "**********************split happen at " // << _p_node_being_split->getFileLine() << endl; // _ss << _p_node_being_split->to_str() << endl; // Util::logging(_ss.str()); // } // first, add the new child node(if not leaf) or child entry(if leaf) to the full node. bool just_insert_entry = (_p_insert_node == NULL); if (just_insert_entry) { _p_node_being_split->addChildEntry(_insert_entry, true); } else { _p_node_being_split->addChildNode(_p_insert_node, true); } SigEntry entryA, entryB; /* two seeds to generate two new nodes. * seedA kernel: the SigEntry with the minimal count of signature. * seedB kernel: the SigEntry with the second minimal count of signature. * */ int minCount = 0; // record the minimal signature count. int entryA_index = 0; // record the seedA kernel index. for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount(); if (minCount < currentCount) { minCount = currentCount; entryA_index = i; } } entryA = _p_node_being_split->getChildEntry(entryA_index); minCount = 0; int entryB_index = 0; // record the seedB kernel index. for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); if (i != entryA_index && minCount <= currentCount) { minCount = currentCount; entryB_index = i; } } entryB = _p_node_being_split->getChildEntry(entryB_index); // AEntryIndex: the entry index near seedA. // BEntryIndex: the entry index near seedB. std::vector<int> entryIndex_nearA, entryIndex_nearB; entryIndex_nearA.clear(); entryIndex_nearB.clear(); entryIndex_nearA.push_back(entryA_index); entryIndex_nearB.push_back(entryB_index); /* just tmp variables, for more readibility */ int nearA_max_size, nearB_max_size; bool nearA_tooSmall, nearB_tooSmall; for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; /* should guarantee that each new node has at least MIN_CHILD_NUM children. */ nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size(); nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM); if (nearA_tooSmall) { for (;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearA.push_back(i); } break; } nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size(); nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM); if (nearB_tooSmall) { for (;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearB.push_back(i); } break; } /* calculate the distance from * the i-th child entry signature to seedA(or seedB).*/ /*debug target 1*/ int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i)); // choose the near one seed to add into if (disToSeedA <= disToSeedB) { entryIndex_nearA.push_back(i); } else { entryIndex_nearB.push_back(i); } } // then create a new node to act as BEntryIndex's father. VNode* newNodePtr = this->createNode(); //debug // { // stringstream _ss; // _ss << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl; // Util::logging(_ss.str()); // } // the old one acts as AEntryIndex's father. VNode* oldNodePtr = _p_node_being_split; // if the old node is leaf, set the new node as a leaf. if (oldNodePtr->isLeaf()) { newNodePtr->setAsLeaf(true); } /* add all the entries in BEntryIndex into the new node child entry array, and calculate the new node's entry.*/ for (unsigned i=0;i<entryIndex_nearB.size();i++) { if (oldNodePtr->isLeaf()) { newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false); } else { /*debug target 2*/ VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer)); newNodePtr->addChildNode(childPtr); } } newNodePtr->refreshSignature(); /* label the child being removed with -1, * and update the old node's entry.*/ std::sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>()); //debug // { // stringstream _ss; // { // _ss << "nearA: "; // for(int i = 0; i < entryIndex_nearA.size(); i ++) // { // _ss << entryIndex_nearA[i] << " "; // } // _ss << endl; // // _ss << "nearB: "; // for(int i = 0; i < entryIndex_nearB.size(); i ++) // { // _ss << entryIndex_nearB[i] << " "; // } // _ss << endl; // } // Util::logging(_ss.str()); // } for (unsigned i=0;i<entryIndex_nearA.size();i++) { oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i])); oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i])); } oldNodePtr->setChildNum(entryIndex_nearA.size()); oldNodePtr->refreshSignature(); int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer)); // full node's father pointer. VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer)); if (oldNodePtr->isRoot()) { /* if the old node is root, * split the root, create a new root, * and the tree height will be increased.*/ VNode* RootNewPtr = this->createNode(); /* change the old root node to not-root node, * and set the RootNew to root node.*/ oldNodePtr->setAsRoot(false); RootNewPtr->setAsRoot(true); /* set the split two node(old node and new node) as the new root's child, * and update signatures.*/ RootNewPtr->addChildNode(oldNodePtr); RootNewPtr->addChildNode(newNodePtr); RootNewPtr->refreshSignature(); //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // Util::logging(_ss.str()); // } /* should keep the root node always being * at the first line(line zero) of the tree node file.*/ this->swapNodeFileLine(RootNewPtr, oldNodePtr); this->height ++; //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl; // _ss << RootNewPtr->to_str() << endl; // Util::logging(_ss.str()); // } } else { /* if the (OldNode) is not Root, * change the old node's signature to A's signature.*/ oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry()); if (oldNodeFatherPtr->isFull()) { oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr); } else { oldNodeFatherPtr->addChildNode(newNodePtr); oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); } } //debug // if (!oldNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl; // Util::logging(_ss.str()); // } // if (!newNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap by these two nodes. this->updateEntityID2FileLineMap(oldNodePtr); this->updateEntityID2FileLineMap(newNodePtr); }