示例#1
0
//insert an new Entry, whose entity doesn't exist before 
bool 
VSTree::insertEntry(const SigEntry& _entry)
{

	//choose the best leaf node to insert the _entry 
    VNode* choosedNodePtr = this->chooseNode(this->getRoot(), _entry);

#ifdef DEBUG_VSTREE
		if (_entry.getEntityId() == 4000001)
		{
			stringstream _ss;
			if (choosedNodePtr)
			{
				_ss << "insert " << _entry.getEntityId()
					<< " into [" << choosedNodePtr->getFileLine() << "],\t";
				_ss << "whose childnum is " << choosedNodePtr->getChildNum() << endl;
			}
			else
			{
				_ss << "insert " << _entry.getEntityId() << " , can not choose a leaf node to insert entry. @VSTree::insert" << endl;
			}
			Util::logging(_ss.str());
		}
#endif

    if (choosedNodePtr == NULL)
    {
        cerr << "error, can not choose a leaf node to insert entry. @VSTree::insert" << endl;
        return false;
    }

    if (choosedNodePtr->isFull())
    {
		 //if the choosed leaf node to insert is full, the node should be split.
        this->split(choosedNodePtr, _entry, NULL);

        //debug
//        if (!choosedNodePtr->checkState())
//        {
//            stringstream _ss;
//            _ss << "node " << choosedNodePtr->getFileLine() << " childFileLine error. after split" << endl;
//            Util::logging(_ss.str());
//        }
    }
    else
    {
        choosedNodePtr->addChildEntry(_entry, false);
        choosedNodePtr->refreshAncestorSignature(*(this->node_buffer));

        //debug
//        if (!choosedNodePtr->checkState())
//        {
//            stringstream _ss;
//            _ss << "node " << choosedNodePtr->getFileLine() << " childFileLine error. after addChildEntry" << endl;
//            _ss <<"child num=" << choosedNodePtr->getChildNum() << endl;
//            _ss <<"node num=" << this->node_num << " entry num=" << this->entry_num << endl;
//            Util::logging(_ss.str());
//        }

        // update the entityID2FileLineMap.
        this->entityID2FileLineMap[_entry.getEntityId()] = choosedNodePtr->getFileLine();
    }
    this->entry_num ++;

    return true;
}
示例#2
0
void 
VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node)
{
#ifdef DEBUG_VSTREE
		stringstream _ss;
		_ss << "**********************split happen at "
			<< _p_node_being_split->getFileLine() << endl;
		_ss << _p_node_being_split->to_str() << endl;
		Util::logging(_ss.str());
#endif
    // first, add the new child node(if not leaf) or child entry(if leaf) to the full node.
	bool just_insert_entry = (_p_insert_node == NULL);
    if(just_insert_entry)
    {
        _p_node_being_split->addChildEntry(_insert_entry, true);
    }
    else
    {
        _p_node_being_split->addChildNode(_p_insert_node, true);
    }

    SigEntry entryA, entryB;

	//BETTER: use hanming, xor result or the vector included angle to guess the distince.
	//And then also use the farest two as seeds.
	//
     //two seeds to generate two new nodes.
	 //seedA kernel: the SigEntry with the minimal count of signature.
	 //seedB kernel: the SigEntry with the maximal count of signature.
     

    int maxCount = 0; // record the minimal signature count.
    int entryA_index = 0; // record the seedA kernel index.
    for(int i = 0; i < VNode::MAX_CHILD_NUM; i++)
    {
        int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount();
        if(maxCount < currentCount)
        {
            maxCount = currentCount;
            entryA_index = i;
        }
    }
    entryA = _p_node_being_split->getChildEntry(entryA_index);

	maxCount = 0;
    int entryB_index = 0; // record the seedB kernel index.
    for(int i = 0; i < VNode::MAX_CHILD_NUM; i++)
    {
		//NOTICE:I think xOR should be used here to choose the farest two
		int currentCount = entryA.xOR(_p_node_being_split->getChildEntry(i));
		//int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
        if(i != entryA_index && maxCount <= currentCount)
        {
            maxCount = currentCount;
            entryB_index = i;
        }
    }
    entryB = _p_node_being_split->getChildEntry(entryB_index);

    // AEntryIndex: the entry index near seedA.
    // BEntryIndex: the entry index near seedB.
    std::vector<int> entryIndex_nearA, entryIndex_nearB;
    entryIndex_nearA.clear();
    entryIndex_nearB.clear();
    entryIndex_nearA.push_back(entryA_index);
    entryIndex_nearB.push_back(entryB_index);

    int nearA_max_size, nearB_max_size;
    bool nearA_tooSmall, nearB_tooSmall;

    for(int i = 0; i < VNode::MAX_CHILD_NUM; i++)
    {
        if(i == entryA_index || i == entryB_index) continue;

		//should guarantee that each new node has at least MIN_CHILD_NUM children. 
        nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size();
        nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM);

        if(nearA_tooSmall)
        {
            for(; i < VNode::MAX_CHILD_NUM; i++)
            {
                if (i == entryA_index || i == entryB_index) continue;
                entryIndex_nearA.push_back(i);
            }
            break;
        }

        nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size();
        nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM);
        if(nearB_tooSmall)
        {
            for(; i < VNode::MAX_CHILD_NUM; i++)
            {
                if(i == entryA_index || i == entryB_index) continue;
                entryIndex_nearB.push_back(i);
            }
            break;
        }

         //calculate the distance from
         //the i-th child entry signature to seedA(or seedB).

		//NOTICE:we should expect that the candidate can be almost contained!
		//However, the precondition there are not too many 1s
        int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
        int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i));
        // choose the near one seed to add into
        if(disToSeedA <= disToSeedB)
        {
			 entryIndex_nearA.push_back(i);
        }
        else
        {
			 entryIndex_nearB.push_back(i);
        }
    }

    // then create a new node to act as BEntryIndex's father.
    VNode* newNodePtr = this->createNode();

#ifdef DEBUG_VSTREE
		stringstream _ss2;
		_ss2 << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl;
		Util::logging(_ss2.str());
#endif
    // the old one acts as AEntryIndex's father.
    VNode* oldNodePtr = _p_node_being_split;

    // if the old node is leaf, set the new node as a leaf.
    if(oldNodePtr->isLeaf())
    {
        newNodePtr->setAsLeaf(true);
    }

	 //add all the entries in BEntryIndex into the new node child entry array,
	//and calculate the new node's entry.
    for(unsigned i = 0; i < entryIndex_nearB.size(); i++)
    {
        if(oldNodePtr->isLeaf())
        {
            newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false);
        }
        else
        {
			 //debug target 2
        	VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer));
            newNodePtr->addChildNode(childPtr);
        }
    }
    newNodePtr->refreshSignature();

     //label the child being removed with -1,
     //and update the old node's entry.
    sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>());

#ifdef DEBUG_VSTREE
    	stringstream _ss1;
    	{
    		_ss1 << "nearA: ";
    		for(unsigned i = 0; i < entryIndex_nearA.size(); i++)
    		{
    			_ss1 << entryIndex_nearA[i] << " ";
    		}
    		_ss1 << endl;

    		_ss1 << "nearB: ";
    		for(unsigned i = 0; i < entryIndex_nearB.size(); i++)
    		{
    			_ss1 << entryIndex_nearB[i] << " ";
    		}
    		_ss1 << endl;
    	}
    	Util::logging(_ss1.str());
#endif

    for(unsigned i = 0; i < entryIndex_nearA.size(); i++)
    {
        oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i]));
        oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i]));
    }
    oldNodePtr->setChildNum(entryIndex_nearA.size());
    oldNodePtr->refreshSignature();

    int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer));
    // full node's father pointer.
    VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer));
    if(oldNodePtr->isRoot())
    {
         //if the old node is root,
		 //split the root, create a new root,
         //and the tree height will be increased.
        VNode* RootNewPtr = this->createNode();

         //change the old root node to not-root node,
         //and set the RootNew to root node.
        oldNodePtr->setAsRoot(false);
        RootNewPtr->setAsRoot(true);

         //set the split two node(old node and new node) as the new root's child,
         //and update signatures.
        RootNewPtr->addChildNode(oldNodePtr);
        RootNewPtr->addChildNode(newNodePtr);
        RootNewPtr->refreshSignature();

        //debug
//        {
//            stringstream _ss;
//            _ss << "create new root:" << endl;
//            _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl;
//            Util::logging(_ss.str());
//        }

         //should keep the root node always being
         //at the first line(line zero) of the tree node file.
        this->swapNodeFileLine(RootNewPtr, oldNodePtr);
        this->height++;

        //debug
//        {
//            stringstream _ss;
//            _ss << "create new root:" << endl;
//            _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl;
//            _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl;
//            _ss << RootNewPtr->to_str() << endl;
//            Util::logging(_ss.str());
//        }
    }
    else
    {
         //if the (OldNode) is not Root,
         //change the old node's signature to A's signature.
    	oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry());


        if(oldNodeFatherPtr->isFull())
        {
        	oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
            this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr);
        }
        else
        {
        	oldNodeFatherPtr->addChildNode(newNodePtr);
        	oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
        }
    }

    //debug
//    if (!oldNodePtr->checkState())
//    {
//        stringstream _ss;
//        _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl;
//        Util::logging(_ss.str());
//    }
//    if (!newNodePtr->checkState())
//    {
//        stringstream _ss;
//        _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl;
//        Util::logging(_ss.str());
//    }

    // update the entityID2FileLineMap by these two nodes.
    this->updateEntityID2FileLineMap(oldNodePtr);
    this->updateEntityID2FileLineMap(newNodePtr);
}
示例#3
0
void VSTree::split(VNode* _p_node_being_split, const SigEntry& _insert_entry, VNode* _p_insert_node)
{
    //debug
//	{
//		stringstream _ss;
//		_ss << "**********************split happen at "
//			<< _p_node_being_split->getFileLine() << endl;
//		_ss << _p_node_being_split->to_str() << endl;
//		Util::logging(_ss.str());
//	}
    // first, add the new child node(if not leaf) or child entry(if leaf) to the full node.
	bool just_insert_entry = (_p_insert_node == NULL);
    if (just_insert_entry)
    {
        _p_node_being_split->addChildEntry(_insert_entry, true);
    }
    else
    {
        _p_node_being_split->addChildNode(_p_insert_node, true);
    }

    SigEntry entryA, entryB;
    /* two seeds to generate two new nodes.
     * seedA kernel: the SigEntry with the minimal count of signature.
     * seedB kernel: the SigEntry with the second minimal count of signature.
     * */

    int minCount = 0; // record the minimal signature count.
    int entryA_index = 0; // record the seedA kernel index.
    for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
    {
        int currentCount = (int) _p_node_being_split->getChildEntry(i).getSigCount();
        if (minCount < currentCount)
        {
            minCount = currentCount;
            entryA_index = i;
        }
    }
    entryA = _p_node_being_split->getChildEntry(entryA_index);

    minCount = 0;
    int entryB_index = 0; // record the seedB kernel index.
    for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
    {
        int currentCount = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
        if (i != entryA_index && minCount <= currentCount)
        {
            minCount = currentCount;
            entryB_index = i;
        }
    }
    entryB = _p_node_being_split->getChildEntry(entryB_index);

    // AEntryIndex: the entry index near seedA.
    // BEntryIndex: the entry index near seedB.
    std::vector<int> entryIndex_nearA, entryIndex_nearB;
    entryIndex_nearA.clear();
    entryIndex_nearB.clear();
    entryIndex_nearA.push_back(entryA_index);
    entryIndex_nearB.push_back(entryB_index);

    /* just tmp variables, for more readibility */
    int nearA_max_size, nearB_max_size;
    bool nearA_tooSmall, nearB_tooSmall;

    for (int i=0;i<VNode::MAX_CHILD_NUM;i++)
    {
        if (i == entryA_index || i == entryB_index) continue;

        /* should guarantee that each new node has at least MIN_CHILD_NUM children. */
        nearA_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearB.size();
        nearA_tooSmall = (nearA_max_size <= VNode::MIN_CHILD_NUM);

        if (nearA_tooSmall)
        {
            for (;i<VNode::MAX_CHILD_NUM;i++)
            {
                if (i == entryA_index || i == entryB_index) continue;
                entryIndex_nearA.push_back(i);
            }
            break;
        }

        nearB_max_size = VNode::MAX_CHILD_NUM - entryIndex_nearA.size();
        nearB_tooSmall = (nearB_max_size <= VNode::MIN_CHILD_NUM);
        if (nearB_tooSmall)
        {
            for (;i<VNode::MAX_CHILD_NUM;i++)
            {
                if (i == entryA_index || i == entryB_index) continue;
                entryIndex_nearB.push_back(i);
            }
            break;
        }

        /* calculate the distance from
         * the i-th child entry signature to seedA(or seedB).*/

        /*debug target 1*/
        int disToSeedA = entryA.xEpsilen(_p_node_being_split->getChildEntry(i));
        int disToSeedB = entryB.xEpsilen(_p_node_being_split->getChildEntry(i));
        // choose the near one seed to add into
        if (disToSeedA <= disToSeedB)
        {
        	 entryIndex_nearA.push_back(i);
        }
        else
        {
        	 entryIndex_nearB.push_back(i);
        }
    }

    // then create a new node to act as BEntryIndex's father.
    VNode* newNodePtr = this->createNode();

    //debug
//    {
//    	stringstream _ss;
//    	_ss << "new Node is :[" << newNodePtr->getFileLine() << "]" << endl;
//    	Util::logging(_ss.str());
//    }
    // the old one acts as AEntryIndex's father.
    VNode* oldNodePtr = _p_node_being_split;

    // if the old node is leaf, set the new node as a leaf.
    if (oldNodePtr->isLeaf())
    {
        newNodePtr->setAsLeaf(true);
    }

    /* add all the entries in BEntryIndex into the new node child entry array,
    and calculate the new node's entry.*/
    for (unsigned i=0;i<entryIndex_nearB.size();i++)
    {
        if (oldNodePtr->isLeaf())
        {
            newNodePtr->addChildEntry(oldNodePtr->getChildEntry(entryIndex_nearB[i]), false);
        }
        else
        {
        	 /*debug target 2*/
        	VNode* childPtr = oldNodePtr->getChild(entryIndex_nearB[i], *(this->node_buffer));
            newNodePtr->addChildNode(childPtr);
        }
    }
    newNodePtr->refreshSignature();

    /* label the child being removed with -1,
     * and update the old node's entry.*/
    std::sort(entryIndex_nearA.begin(), entryIndex_nearA.end(), less<int>());

    //debug
//    {
//    	stringstream _ss;
//    	{
//    		_ss << "nearA: ";
//    		for(int i = 0; i < entryIndex_nearA.size(); i ++)
//    		{
//    			_ss << entryIndex_nearA[i] << " ";
//    		}
//    		_ss << endl;
//
//    		_ss << "nearB: ";
//    		for(int i = 0; i < entryIndex_nearB.size(); i ++)
//    		{
//    			_ss << entryIndex_nearB[i] << " ";
//    		}
//    		_ss << endl;
//    	}
//    	Util::logging(_ss.str());
//    }

    for (unsigned i=0;i<entryIndex_nearA.size();i++)
    {
        oldNodePtr->setChildEntry(i, oldNodePtr->getChildEntry(entryIndex_nearA[i]));
        oldNodePtr->setChildFileLine(i, oldNodePtr->getChildFileLine(entryIndex_nearA[i]));
    }
    oldNodePtr->setChildNum(entryIndex_nearA.size());
    oldNodePtr->refreshSignature();

    int oldNode_index = oldNodePtr->getIndexInFatherNode(*(this->node_buffer));
    // full node's father pointer.
    VNode* oldNodeFatherPtr = oldNodePtr->getFather(*(this->node_buffer));
    if (oldNodePtr->isRoot())
    {
        /* if the old node is root,
         * split the root, create a new root,
         * and the tree height will be increased.*/
        VNode* RootNewPtr = this->createNode();

        /* change the old root node to not-root node,
         * and set the RootNew to root node.*/
        oldNodePtr->setAsRoot(false);
        RootNewPtr->setAsRoot(true);

        /* set the split two node(old node and new node) as the new root's child,
         * and update signatures.*/
        RootNewPtr->addChildNode(oldNodePtr);
        RootNewPtr->addChildNode(newNodePtr);
        RootNewPtr->refreshSignature();

        //debug
//        {
//            stringstream _ss;
//            _ss << "create new root:" << endl;
//            _ss << "before swap file line, two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl;
//            Util::logging(_ss.str());
//        }

        /* should keep the root node always being
         * at the first line(line zero) of the tree node file.*/
        this->swapNodeFileLine(RootNewPtr, oldNodePtr);
        this->height ++;

        //debug
//        {
//            stringstream _ss;
//            _ss << "create new root:" << endl;
//            _ss << "two sons are: " << oldNodePtr->getFileLine() << " " << newNodePtr->getFileLine() << endl;
//            _ss << Signature::BitSet2str(oldNodePtr->getEntry().getEntitySig().entityBitSet) << endl;
//            _ss << RootNewPtr->to_str() << endl;
//            Util::logging(_ss.str());
//        }
    }
    else
    {
        /* if the (OldNode) is not Root,
         * change the old node's signature to A's signature.*/
    	oldNodeFatherPtr->setChildEntry(oldNode_index, oldNodePtr->getEntry());


        if (oldNodeFatherPtr->isFull())
        {
        	oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
            this->split(oldNodeFatherPtr, newNodePtr->getEntry(), newNodePtr);
        }
        else
        {
        	oldNodeFatherPtr->addChildNode(newNodePtr);
        	oldNodeFatherPtr->refreshAncestorSignature(*(this->node_buffer));
        }
    }

    //debug
//    if (!oldNodePtr->checkState())
//    {
//        stringstream _ss;
//        _ss << "node " << oldNodePtr->getFileLine() << " childFileLine error. oldNode when split" << endl;
//        Util::logging(_ss.str());
//    }
//    if (!newNodePtr->checkState())
//    {
//        stringstream _ss;
//        _ss << "node " << newNodePtr->getFileLine() << " childFileLine error. newNode when split" << endl;
//        Util::logging(_ss.str());
//    }

    // update the entityID2FileLineMap by these two nodes.
    this->updateEntityID2FileLineMap(oldNodePtr);
    this->updateEntityID2FileLineMap(newNodePtr);
}