std::string VSTree::to_str() { //debug { stringstream _ss; _ss << "after build tree, root is:" << endl; _ss << this->getRoot()->to_str() << endl; Util::logging(_ss.str()); } std::stringstream _ss; std::queue<int> nodeFileLineQueue; nodeFileLineQueue.push(this->getRoot()->getFileLine()); while(! nodeFileLineQueue.empty()) { int currentNodeFileLine = nodeFileLineQueue.front(); nodeFileLineQueue.pop(); VNode* currentNodePtr = this->getNode(currentNodeFileLine); _ss << currentNodePtr->to_str(); int childNum = currentNodePtr->getChildNum(); for(int i = 0; i < childNum; i ++) { if(! currentNodePtr->isLeaf()) { int childNodeFileLine = currentNodePtr->getChildFileLine(i); nodeFileLineQueue.push(childNodeFileLine); } } } return _ss.str(); }
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node) { #ifdef DEBUG_VSTREE stringstream _ss; _ss << "**********************split happen at " << _p_node_being_split->getFileLine() << endl; _ss << _p_node_being_split->to_str() << endl; Util::logging(_ss.str()); #endif // first, add the new child node(if not leaf) or child entry(if leaf) to the full node. bool just_insert_entry = (_p_insert_node == NULL); if(just_insert_entry) { _p_node_being_split->addChildEntry(_insert_entry, true); } else { _p_node_being_split->addChildNode(_p_insert_node, true); } SigEntry entryA, entryB; //BETTER: use hanming, xor result or the vector included angle to guess the distince. //And then also use the farest two as seeds. // //two seeds to generate two new nodes. //seedA kernel: the SigEntry with the minimal count of signature. //seedB kernel: the SigEntry with the maximal count of signature. int maxCount = 0; // record the minimal signature count. int entryA_index = 0; // record the seedA kernel index. for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount(); if(maxCount < currentCount) { maxCount = currentCount; entryA_index = i; } } entryA = _p_node_being_split->getChildEntry(entryA_index); maxCount = 0; int entryB_index = 0; // record the seedB kernel index. for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { //NOTICE:I think xOR should be used here to choose the farest two int currentCount = entryA.xOR(_p_node_being_split->getChildEntry(i)); //int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); if(i != entryA_index && maxCount <= currentCount) { maxCount = currentCount; entryB_index = i; } } entryB = _p_node_being_split->getChildEntry(entryB_index); // AEntryIndex: the entry index near seedA. // BEntryIndex: the entry index near seedB. std::vector<int> entryIndex_nearA, entryIndex_nearB; entryIndex_nearA.clear(); entryIndex_nearB.clear(); entryIndex_nearA.push_back(entryA_index); entryIndex_nearB.push_back(entryB_index); int nearA_max_size, nearB_max_size; bool nearA_tooSmall, nearB_tooSmall; for(int i = 0; i < VNode::MAX_CHILD_NUM; i++) { if(i == entryA_index || i == entryB_index) continue; //should guarantee that each new node has at least MIN_CHILD_NUM children. nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size(); nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM); if(nearA_tooSmall) { for(; i < VNode::MAX_CHILD_NUM; i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearA.push_back(i); } break; } nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size(); nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM); if(nearB_tooSmall) { for(; i < VNode::MAX_CHILD_NUM; i++) { if(i == entryA_index || i == entryB_index) continue; entryIndex_nearB.push_back(i); } break; } //calculate the distance from //the i-th child entry signature to seedA(or seedB). //NOTICE:we should expect that the candidate can be almost contained! //However, the precondition there are not too many 1s int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i)); // choose the near one seed to add into if(disToSeedA <= disToSeedB) { entryIndex_nearA.push_back(i); } else { entryIndex_nearB.push_back(i); } } // then create a new node to act as BEntryIndex's father. VNode* newNodePtr = this->createNode(); #ifdef DEBUG_VSTREE stringstream _ss2; _ss2 << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl; Util::logging(_ss2.str()); #endif // the old one acts as AEntryIndex's father. VNode* oldNodePtr = _p_node_being_split; // if the old node is leaf, set the new node as a leaf. if(oldNodePtr->isLeaf()) { newNodePtr->setAsLeaf(true); } //add all the entries in BEntryIndex into the new node child entry array, //and calculate the new node's entry. for(unsigned i = 0; i < entryIndex_nearB.size(); i++) { if(oldNodePtr->isLeaf()) { newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false); } else { //debug target 2 VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer)); newNodePtr->addChildNode(childPtr); } } newNodePtr->refreshSignature(); //label the child being removed with -1, //and update the old node's entry. sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>()); #ifdef DEBUG_VSTREE stringstream _ss1; { _ss1 << "nearA: "; for(unsigned i = 0; i < entryIndex_nearA.size(); i++) { _ss1 << entryIndex_nearA[i] << " "; } _ss1 << endl; _ss1 << "nearB: "; for(unsigned i = 0; i < entryIndex_nearB.size(); i++) { _ss1 << entryIndex_nearB[i] << " "; } _ss1 << endl; } Util::logging(_ss1.str()); #endif for(unsigned i = 0; i < entryIndex_nearA.size(); i++) { oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i])); oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i])); } oldNodePtr->setChildNum(entryIndex_nearA.size()); oldNodePtr->refreshSignature(); int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer)); // full node's father pointer. VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer)); if(oldNodePtr->isRoot()) { //if the old node is root, //split the root, create a new root, //and the tree height will be increased. VNode* RootNewPtr = this->createNode(); //change the old root node to not-root node, //and set the RootNew to root node. oldNodePtr->setAsRoot(false); RootNewPtr->setAsRoot(true); //set the split two node(old node and new node) as the new root's child, //and update signatures. RootNewPtr->addChildNode(oldNodePtr); RootNewPtr->addChildNode(newNodePtr); RootNewPtr->refreshSignature(); //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // Util::logging(_ss.str()); // } //should keep the root node always being //at the first line(line zero) of the tree node file. this->swapNodeFileLine(RootNewPtr, oldNodePtr); this->height++; //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl; // _ss << RootNewPtr->to_str() << endl; // Util::logging(_ss.str()); // } } else { //if the (OldNode) is not Root, //change the old node's signature to A's signature. oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry()); if(oldNodeFatherPtr->isFull()) { oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr); } else { oldNodeFatherPtr->addChildNode(newNodePtr); oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); } } //debug // if (!oldNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl; // Util::logging(_ss.str()); // } // if (!newNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap by these two nodes. this->updateEntityID2FileLineMap(oldNodePtr); this->updateEntityID2FileLineMap(newNodePtr); }
//retrieve the candidate entity ID which signature can cover the _entity_bit_set, and add them to the _p_id_list. void VSTree::retrieveEntity(const EntityBitSet& _entity_bit_set, IDList* _p_id_list) { Util::logging("IN retrieveEntity"); EntitySig filterSig(_entity_bit_set); #ifdef DEBUG_VSTREE cerr << "the filter signature: " << filterSig.to_str() << endl; #endif queue<int> nodeQueue; //searching node file line queue. //debug { stringstream _ss; _ss << "filterSig=" << Signature::BitSet2str(filterSig.entityBitSet) << endl; Util::logging(_ss.str()); } const SigEntry& root_entry = (this->getRoot())->getEntry(); Util::logging("Get Root Entry"); if(root_entry.cover(filterSig)) { nodeQueue.push(this->getRoot()->getFileLine()); Util::logging("root cover the filter_sig"); } else { Util::logging("warning: root is not cover the filter_sig"); } //debug // { // Util::logging(this->getRoot()->to_str()); // Util::logging("Before BFS"); // } //using BFS algorithm to traverse the VSTree and retrieve the entry. while (!nodeQueue.empty()) { int currentNodeFileLine = nodeQueue.front(); nodeQueue.pop(); VNode* currentNodePtr = this->getNode(currentNodeFileLine); int childNum = currentNodePtr->getChildNum(); //debug // { // std::stringstream _ss; // _ss << "childNum of [" // << currentNodePtr->getFileLine() // << "] is " << childNum << endl; // // for (int i=0;i<childNum;i++) // { // _ss << currentNodePtr->getChildFileLine(i) << " "; // } // _ss << endl; // // Util::logging(_ss.str()); // } int valid = 0; for (int i = 0; i < childNum; i++) { const SigEntry& entry = currentNodePtr->getChildEntry(i); #ifdef DEBUG_VSTREE //cerr << "current entry: " << entry.to_str() << endl; #endif if (entry.cover(filterSig)) { valid++; if (currentNodePtr->isLeaf()) { // if leaf node, add the satisfying entries' entity id to result list. _p_id_list->addID(entry.getEntityId()); //debug // { // stringstream _ss; // _ss << "child_" << i << " cover filter sig" << endl; // _ss << Signature::BitSet2str(entry.getEntitySig().entityBitSet)<< endl; // Util::logging(_ss.str()); // } } else { // if non-leaf node, add the child node pointer to the searching queue. //VNode* childPtr = currentNodePtr->getChild(i, *(this->node_buffer)); // if non-leaf node, add the child node file line to the searching queue. int childNodeFileLine = currentNodePtr->getChildFileLine(i); nodeQueue.push(childNodeFileLine); //debug // { // stringstream _ss; // _ss << "child[" << childPtr->getFileLine() << "] cover filter sig" << endl; // Util::logging(_ss.str()); // } } } } #ifdef DEBUG_VSTREE //cerr << "child num: " << childNum << " valid num: " << valid << endl; #endif } Util::logging("OUT retrieveEntity"); }
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node) { //debug // { // stringstream _ss; // _ss << "**********************split happen at " // << _p_node_being_split->getFileLine() << endl; // _ss << _p_node_being_split->to_str() << endl; // Util::logging(_ss.str()); // } // first, add the new child node(if not leaf) or child entry(if leaf) to the full node. bool just_insert_entry = (_p_insert_node == NULL); if (just_insert_entry) { _p_node_being_split->addChildEntry(_insert_entry, true); } else { _p_node_being_split->addChildNode(_p_insert_node, true); } SigEntry entryA, entryB; /* two seeds to generate two new nodes. * seedA kernel: the SigEntry with the minimal count of signature. * seedB kernel: the SigEntry with the second minimal count of signature. * */ int minCount = 0; // record the minimal signature count. int entryA_index = 0; // record the seedA kernel index. for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount(); if (minCount < currentCount) { minCount = currentCount; entryA_index = i; } } entryA = _p_node_being_split->getChildEntry(entryA_index); minCount = 0; int entryB_index = 0; // record the seedB kernel index. for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); if (i != entryA_index && minCount <= currentCount) { minCount = currentCount; entryB_index = i; } } entryB = _p_node_being_split->getChildEntry(entryB_index); // AEntryIndex: the entry index near seedA. // BEntryIndex: the entry index near seedB. std::vector<int> entryIndex_nearA, entryIndex_nearB; entryIndex_nearA.clear(); entryIndex_nearB.clear(); entryIndex_nearA.push_back(entryA_index); entryIndex_nearB.push_back(entryB_index); /* just tmp variables, for more readibility */ int nearA_max_size, nearB_max_size; bool nearA_tooSmall, nearB_tooSmall; for (int i=0;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; /* should guarantee that each new node has at least MIN_CHILD_NUM children. */ nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size(); nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM); if (nearA_tooSmall) { for (;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearA.push_back(i); } break; } nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size(); nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM); if (nearB_tooSmall) { for (;i<VNode::MAX_CHILD_NUM;i++) { if (i == entryA_index || i == entryB_index) continue; entryIndex_nearB.push_back(i); } break; } /* calculate the distance from * the i-th child entry signature to seedA(or seedB).*/ /*debug target 1*/ int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i)); int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i)); // choose the near one seed to add into if (disToSeedA <= disToSeedB) { entryIndex_nearA.push_back(i); } else { entryIndex_nearB.push_back(i); } } // then create a new node to act as BEntryIndex's father. VNode* newNodePtr = this->createNode(); //debug // { // stringstream _ss; // _ss << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl; // Util::logging(_ss.str()); // } // the old one acts as AEntryIndex's father. VNode* oldNodePtr = _p_node_being_split; // if the old node is leaf, set the new node as a leaf. if (oldNodePtr->isLeaf()) { newNodePtr->setAsLeaf(true); } /* add all the entries in BEntryIndex into the new node child entry array, and calculate the new node's entry.*/ for (unsigned i=0;i<entryIndex_nearB.size();i++) { if (oldNodePtr->isLeaf()) { newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false); } else { /*debug target 2*/ VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer)); newNodePtr->addChildNode(childPtr); } } newNodePtr->refreshSignature(); /* label the child being removed with -1, * and update the old node's entry.*/ std::sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>()); //debug // { // stringstream _ss; // { // _ss << "nearA: "; // for(int i = 0; i < entryIndex_nearA.size(); i ++) // { // _ss << entryIndex_nearA[i] << " "; // } // _ss << endl; // // _ss << "nearB: "; // for(int i = 0; i < entryIndex_nearB.size(); i ++) // { // _ss << entryIndex_nearB[i] << " "; // } // _ss << endl; // } // Util::logging(_ss.str()); // } for (unsigned i=0;i<entryIndex_nearA.size();i++) { oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i])); oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i])); } oldNodePtr->setChildNum(entryIndex_nearA.size()); oldNodePtr->refreshSignature(); int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer)); // full node's father pointer. VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer)); if (oldNodePtr->isRoot()) { /* if the old node is root, * split the root, create a new root, * and the tree height will be increased.*/ VNode* RootNewPtr = this->createNode(); /* change the old root node to not-root node, * and set the RootNew to root node.*/ oldNodePtr->setAsRoot(false); RootNewPtr->setAsRoot(true); /* set the split two node(old node and new node) as the new root's child, * and update signatures.*/ RootNewPtr->addChildNode(oldNodePtr); RootNewPtr->addChildNode(newNodePtr); RootNewPtr->refreshSignature(); //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // Util::logging(_ss.str()); // } /* should keep the root node always being * at the first line(line zero) of the tree node file.*/ this->swapNodeFileLine(RootNewPtr, oldNodePtr); this->height ++; //debug // { // stringstream _ss; // _ss << "create new root:" << endl; // _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl; // _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl; // _ss << RootNewPtr->to_str() << endl; // Util::logging(_ss.str()); // } } else { /* if the (OldNode) is not Root, * change the old node's signature to A's signature.*/ oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry()); if (oldNodeFatherPtr->isFull()) { oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr); } else { oldNodeFatherPtr->addChildNode(newNodePtr); oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer)); } } //debug // if (!oldNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl; // Util::logging(_ss.str()); // } // if (!newNodePtr->checkState()) // { // stringstream _ss; // _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl; // Util::logging(_ss.str()); // } // update the entityID2FileLineMap by these two nodes. this->updateEntityID2FileLineMap(oldNodePtr); this->updateEntityID2FileLineMap(newNodePtr); }