BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t begin,
    const size_t count,
    std::vector<size_t>& oldFromNew,
    std::vector<size_t>& newFromOld,
    BinarySpaceTree* parent,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(parent),
    begin(begin),
    count(count),
    maxLeafSize(maxLeafSize),
    bound(data.n_rows),
    dataset(data)
{
  // Hopefully the vector is initialized correctly!  We can't check that
  // entirely but we can do a minor sanity check.


  // Perform the actual splitting.
  SplitNode(data, oldFromNew);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);

  // Map the newFromOld indices correctly.
  newFromOld.resize(data.n_cols);
  for (size_t i = 0; i < data.n_cols; i++)
    newFromOld[oldFromNew[i]] = i;
}
void CallChainRoot::AddCallChain(const std::vector<SampleEntry*>& callchain, uint64_t period) {
    children_period += period;
    CallChainNode* p = FindMatchingNode(children, callchain[0]);
    if (p == nullptr) {
        std::unique_ptr<CallChainNode> new_node = AllocateNode(callchain, 0, period, 0);
        children.push_back(std::move(new_node));
        return;
    }
    size_t callchain_pos = 0;
    while (true) {
        size_t match_length = GetMatchingLengthInNode(p, callchain, callchain_pos);
        CHECK_GT(match_length, 0u);
        callchain_pos += match_length;
        bool find_child = true;
        if (match_length < p->chain.size()) {
            SplitNode(p, match_length);
            find_child = false;  // No need to find matching node in p->children.
        }
        if (callchain_pos == callchain.size()) {
            p->period += period;
            return;
        }
        p->children_period += period;
        if (find_child) {
            CallChainNode* np = FindMatchingNode(p->children, callchain[callchain_pos]);
            if (np != nullptr) {
                p = np;
                continue;
            }
        }
        std::unique_ptr<CallChainNode> new_node = AllocateNode(callchain, callchain_pos, period, 0);
        p->children.push_back(std::move(new_node));
        break;
    }
}
Ejemplo n.º 3
0
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>::
SpillTree(
    MatType&& data,
    const double tau,
    const size_t maxLeafSize,
    const double rho) :
    left(NULL),
    right(NULL),
    parent(NULL),
    count(0),
    pointsIndex(NULL),
    overlappingNode(false),
    hyperplane(),
    bound(data.n_rows),
    parentDistance(0), // Parent distance for the root is 0: it has no parent.
    dataset(new MatType(std::move(data))),
    localDataset(true)
{
  arma::Col<size_t> points;
  if (dataset->n_cols > 0)
    // Fill points with all possible indexes: 0 .. (dataset->n_cols - 1).
    points = arma::linspace<arma::Col<size_t>>(0, dataset->n_cols - 1,
        dataset->n_cols);

  // Do the actual splitting of this node.
  SplitNode(points, maxLeafSize, tau, rho);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 4
0
void TreeCommon::SplitNodeRecursively(int node_id, float std_dev_threshold) {
    queue<CNode*> node_queue;
	node_queue.push(this->GetNode(node_id));
	while (node_queue.size() > 0) {
		CNode* temp_node = node_queue.front();
		node_queue.pop();

		if (temp_node->type() == CNode::BRANCH) {
			CBranch* branch = dynamic_cast<CBranch*>(temp_node);
			if (branch == NULL || branch->level() > MAX_ALLOWED_LEVEL) continue;

			float accu_std_dev = 0.0;
			for (int i = 0; i < point_dataset_->var_num; ++i) {
				accu_std_dev += branch->std_deviations[i] * point_dataset_->var_weights[i];
			}
            if ((branch == root_ || accu_std_dev > std_dev_threshold) && branch->point_count > 10) {
                SplitNode(branch);

                vector<vector<float>> center_pos;
                for (int i = 0; i < branch->linked_nodes.size(); ++i)
                    center_pos.push_back(branch->linked_nodes[i]->center_pos);
                branch->average_dis = Utility::GetAverageDistance(center_pos);

                if (branch->level() + 1 > this->max_level_) this->max_level_ = branch->level() + 1;

                for (int i = 0; i < branch->linked_nodes.size(); ++i) {
                    id_node_map_.insert(map<int, CNode*>::value_type(branch->linked_nodes[i]->id(), branch->linked_nodes[i]));
                }
            }

			for (int i = 0; i < branch->linked_nodes.size(); ++i)
				node_queue.push(branch->linked_nodes[i]);
		}
	}
}
Ejemplo n.º 5
0
void TreeCommon::SplitNodeOnce(int node_id) {
    CNode* node = this->GetNode(node_id);
    if (node == NULL || node->type() != CNode::BRANCH) {
        cout << "Error Split Node: " << node_id << endl;
        return;
    }
    CBranch* branch = (CBranch*)node;

	bool is_children_all_leaf = true;
	for (int i = 0; i < branch->linked_nodes.size(); ++i)
		if (branch->linked_nodes[i]->type() != CNode::LEAF) {
			is_children_all_leaf = false;
			break;
		}
    if (is_children_all_leaf && branch->linked_nodes.size() > 2) {
        SplitNode(branch);

        vector<vector<float>> center_pos;
        for (int i = 0; i < branch->linked_nodes.size(); ++i)
            center_pos.push_back(branch->linked_nodes[i]->center_pos);
        branch->average_dis = Utility::GetAverageDistance(center_pos);

        if (branch->level() + 1 > this->max_level_) this->max_level_ = branch->level() + 1;

        for (int i = 0; i < branch->linked_nodes.size(); ++i) {
            id_node_map_.insert(map<int, CNode*>::value_type(branch->linked_nodes[i]->id(), branch->linked_nodes[i]));
        }
    }
}
Ejemplo n.º 6
0
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t begin,
    const size_t count,
    std::vector<size_t>& oldFromNew,
    SplitType& splitter,
    BinarySpaceTree* parent,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(parent),
    begin(begin),
    count(count),
    bound(data.n_rows),
    dataset(data)
{
  // Hopefully the vector is initialized correctly!  We can't check that
  // entirely but we can do a minor sanity check.
  assert(oldFromNew.size() == data.n_cols);

  // Perform the actual splitting.
  SplitNode(data, oldFromNew, maxLeafSize, splitter);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    std::vector<size_t>& oldFromNew,
    std::vector<size_t>& newFromOld,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(NULL),
    begin(0),
    count(data.n_cols),
    maxLeafSize(maxLeafSize),
    bound(data.n_rows),
    parentDistance(0), // Parent distance for the root is 0: it has no parent.
    dataset(data)
{
  // Initialize the oldFromNew vector correctly.
  oldFromNew.resize(data.n_cols);
  for (size_t i = 0; i < data.n_cols; i++)
    oldFromNew[i] = i; // Fill with unharmed indices.

  // Now do the actual splitting.
  SplitNode(data, oldFromNew);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);

  // Map the newFromOld indices correctly.
  newFromOld.resize(data.n_cols);
  for (size_t i = 0; i < data.n_cols; i++)
    newFromOld[oldFromNew[i]] = i;
}
Ejemplo n.º 8
0
void BuildTree(Node *root, double **data, int n) {

/* Wrapper function to initiate decision tree construction
 */

    SplitNode(root, data, n, 0, 0);
    return;
}
Ejemplo n.º 9
0
	bool AddBranch(RtreeBranch *br, RtreeNode *node, Node *new_node)
	{
		if (node->count < M)
		{
			node->branch[node->count++] = *br;
			return false;
		}
		assert(node->count == M);
		SplitNode(node, br, new_node);
		assert(node->count + (*new_node)->count == M + 1);
		return true;
	}
Ejemplo n.º 10
0
void NormalInsert(pBtree p,int key)
{
	int pos;
	pos=Find(p->Key,1,p->Sum,key);
	if(IsLeaf(p))
	{
		InsertKeyToLeaf(p,key,pos);
	}
	else
	{
		if(NeedSplit(p->Child[pos-1]))
		{
			SplitNode(p,pos);
			pos=Find(p->Key,1,p->Sum,key);
		}
		NormalInsert(p->Child[pos-1],key);
	}
}
Ejemplo n.º 11
0
RTREE_TEMPLATE
bool RTREE_QUAL::AddBranch(const Branch* a_branch, Node* a_node, Node** a_newNode) {
    ASSERT(a_branch);
    ASSERT(a_node);

    if (a_node->m_count < MAXNODES) // Split won't be necessary
    {
        a_node->m_branch[a_node->m_count] = *a_branch;
        ++a_node->m_count;

        return false;
    } else {
        ASSERT(a_newNode);

        SplitNode(a_node, a_branch, a_newNode);
        return true;
    }
}
Ejemplo n.º 12
0
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(NULL),
    begin(0), /* This root node starts at index 0, */
    count(data.n_cols), /* and spans all of the dataset. */
    bound(data.n_rows),
    parentDistance(0), // Parent distance for the root is 0: it has no parent.
    dataset(data)
{
  // Do the actual splitting of this node.
  SplitNode(data, maxLeafSize);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 13
0
void OpsRTree::AdjustRTree(RTreeNode *newNode)
{
    // get the pointer to the node that is on top of the stack

    RTreeNode *node = m_nodePtrStack.Top();
    assert(node != NULL);
    m_nodePtrStack.Pop();

    // walk through the node pointers on the stack, adjusting node extents,
    // and splitting nodes where required

    while (!m_nodePtrStack.IsEmpty()) {
        RTreeNode *parent = m_nodePtrStack.Top();

        if (newNode == NULL)
            parent->m_nodeExtent.UnionWith(&node->m_nodeExtent);
        else if (parent->IsNodeFull())
            newNode = SplitNode(parent, newNode, &newNode->m_nodeExtent);
        else {
            parent->m_nodeExtent.UnionWith(&node->m_nodeExtent);
            parent->AddChild(newNode);
            parent->m_nodeExtent.UnionWith(&newNode->m_nodeExtent);
            newNode = NULL;
        }

        node = parent;
        m_nodePtrStack.Pop();
    }

    // if the root node was split, then create a new one, growing the tree
    // in height by 1

    if (newNode != NULL) {
        assert(node == m_rootNode);
        m_rootNode = m_nodeAllocator.Allocate();
        m_rootNode->Initialize(node->m_nodeLevel + 1);
        m_rootNode->m_child[0] = node;
        m_rootNode->m_nodeExtent = node->m_nodeExtent;
        m_rootNode->m_child[1] = newNode;
        m_rootNode->m_nodeExtent.UnionWith(&newNode->m_nodeExtent);
        m_rTreeHeight++;
    }

} // end: AdjustRTree()
Ejemplo n.º 14
0
void QuadTree::BuildStaticTree( vector<GameObject*> objects, Point3* location, float width, float height )
{

	rootStatic = new MyNode();
	rootStatic->myObjects = vector<GameObject*>();
	rootStatic->wallPoints = vector<Point3*>();
	//myObjects = vector<GameObject*>();
	for (int i = 0; i<objects.size();i++)
	{
		//myObjects.push_back(objects[i]);
		rootStatic->myObjects.push_back(objects[i]);
	}

//	rootStatic->tree = this;
	rootStatic->width = width;
	rootStatic->height = height;
	rootStatic->position = location;
	SplitNode(rootStatic, 0);
}
Ejemplo n.º 15
0
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree(
    MatType& data,
    const size_t begin,
    const size_t count,
    BinarySpaceTree* parent,
    const size_t maxLeafSize) :
    left(NULL),
    right(NULL),
    parent(parent),
    begin(begin),
    count(count),
    bound(data.n_rows),
    dataset(data)
{
  // Perform the actual splitting.
  SplitNode(data, maxLeafSize);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 16
0
int Insert(node **tree, int value) {
    int i, j;
    node *temp = *tree;
    node *parent = NULL;
    
    if (*tree == NULL) {
        *tree = CreateNode(value, NULL, NULL);
        
        elements++;         //////////
        return 1;
    }
    while (1) {
        if (temp->order == 4)
            temp = SplitNode(temp, parent);
        i = 0;
        while ((i < (temp->order - 1)) && (value >= temp->item[i])) {
            if (value == temp->item[i])
                return 0;
            i++;
        }
        if (temp->child[i] != NULL) {
            parent = temp;
            temp = temp->child[i];
        }
        else {
            j = temp->order - 1;
            while (j > i) {
                temp->item[j] = temp->item[j-1];
                j--;
            }
            temp->item[i] = value;
            temp->order++;
            
            elements++;     //////////
            return 1;
        }
    }
}
Ejemplo n.º 17
0
void OpsRTree::Insert(void *item, const OpsFloatExtent *itemExtent, int level)
{
    // choose the best node at the specified level to insert into

    RTreeNode *node = ChooseNode(itemExtent, level);

    // add the item to the node, splitting it, if it is already full

    RTreeNode *newNode;

    if (node->IsNodeFull())
        newNode = SplitNode(node, item, itemExtent);
    else {
        newNode = NULL;
        node->AddChild(item);
        node->m_nodeExtent.UnionWith(itemExtent);
    }

    // adjust the nodes at the search path from the root

    AdjustRTree(newNode);

} // end: Insert()
Ejemplo n.º 18
0
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>::
SpillTree(
    SpillTree* parent,
    arma::Col<size_t>& points,
    const double tau,
    const size_t maxLeafSize,
    const double rho) :
    left(NULL),
    right(NULL),
    parent(parent),
    count(0),
    pointsIndex(NULL),
    overlappingNode(false),
    hyperplane(),
    bound(parent->Dataset().n_rows),
    dataset(&parent->Dataset()), // Point to the parent's dataset.
    localDataset(false)
{
  // Perform the actual splitting.
  SplitNode(points, maxLeafSize, tau, rho);

  // Create the statistic depending on if we are a leaf or not.
  stat = StatisticType(*this);
}
Ejemplo n.º 19
0
void QuadTree::SplitNode( MyNode* n, int steps )
{
	if (steps>4 || n->myObjects.size() < TOLERANCE)
	{
		for (int i = 0;i<n->myObjects.size();i++)
		{
			if ((n->myObjects)[i]->objectType == GameObject::type::BALL)
			{
				n->hasBall = true;
				break;
			}
			else
			{
				n->hasBall = false;
			}
		}
		return;
	}

	n->tl = new MyNode();
	n->tr = new MyNode();
	n->bl = new MyNode();
	n->br = new MyNode();

	float w = n->width/2.0, h = n->height/2.0;

	n->tl->width = n->tr->width = n->bl->width = n->br->width = w;
	n->tl->height = n->tr->height = n->bl->height = n->br->height = h;

	//n->tl-> n->tr-> = n->bl-> = n->br-> = ;
	n->bl->position = n->position;
	n->br->position = new Point3(n->position->x+w, n->position->y, n->position->z);

	n->tl->position = new Point3(n->position->x, n->position->y+h, n->position->z);
	n->tr->position = new Point3(n->position->x+w, n->position->y+h, n->position->z);

	n->tl->myObjects = vector<GameObject*>();
	n->tr->myObjects = vector<GameObject*>();
	n->bl->myObjects = vector<GameObject*>();
	n->br->myObjects = vector<GameObject*>();

	n->tl->wallPoints = n->wallPoints;
	n->tr->wallPoints = n->wallPoints;
	n->bl->wallPoints = n->wallPoints;
	n->br->wallPoints = n->wallPoints;

	n->tl->hasWall = n->hasWall;
	n->tr->hasWall = n->hasWall;
	n->bl->hasWall = n->hasWall;
	n->br->hasWall = n->hasWall;
	for (int i = 0;i<n->myObjects.size();i++)
	{
		GameObject* obj = (n->myObjects)[i];
		if ((n->myObjects)[i]->position.x >= n->tr->position->x)
		{
			if ((n->myObjects)[i]->position.y >= n->tr->position->y)
			{
				n->tr->myObjects.push_back((n->myObjects)[i]);
			}
			else
			{
				n->br->myObjects.push_back((n->myObjects)[i]);
			}
		}
		else
		{
			if ((n->myObjects)[i]->position.y >= n->tl->position->y)
			{
				n->tl->myObjects.push_back((n->myObjects)[i]);
			}
			else
			{
				n->bl->myObjects.push_back((n->myObjects)[i]);
			}
		}
	}
	// for now don't remove objects from parent nodes

//	n->tl->tree = n->tr->tree = n->bl->tree = n->br->tree = this;

	SplitNode(n->tl, steps+1);

	SplitNode(n->tr, steps+1);

	SplitNode(n->bl, steps+1);

	SplitNode(n->br, steps+1);
}
Ejemplo n.º 20
0
//Train 2 nodes based on all input set
void TopDownHClust::BuildNodes(TreeNode* curr)
{
	int j;
	Child* c; 
	int z,b;
	double winningScore=0, winningPVal=0, lowestIntSim, lastNLNZ=0;
	TreeNode* winner=NULL;
	TreeNode* lowSimNode=NULL;
	char tmpName[STR_LEN];
	int mi1, mi2; double pS; bool mforward1, mforward2;
printf("Building TDHC\n");
	t=0;
	//Starts with a single node... split it 
	SplitNode(root);
	
	while(numLeaves<numMotifs){printf("***%d Leaves\n", numLeaves);
		t=0;
		//Do CYCLE_MAX times
		do{
			//Reset the nodes
			InorderReset(curr);
			//Assign input motifs to the most similar nodes
			for(int x=0; x<numMotifs; x++)
			{
				winningScore=-100000; winningPVal=-100000; winner=NULL;
				InorderFindWinner(root, motifSet[x], winner, mi1, mi2, mforward1, mforward2, winningScore, winningPVal);
				//add the motif to the winning node
				if(winner->members==0){
					if(winner->alignment!=NULL)
						delete winner->alignment;
					winner->alignment = new MultiAlignRec(1, motifSet[x]->GetLen());
					strcpy(winner->alignment->alignedNames[0], motifSet[x]->name);
					strcpy(winner->alignment->profileAlignment[0]->name, motifSet[x]->name);
					winner->alignment->alignedIDs[0] = x; 
					winner->members=1;
					winner->avgPval=winningPVal;
					//initialise the alignment
					for(z=0; z<motifSet[x]->GetLen(); z++)
						for(b=0; b<B; b++)
							winner->alignment->profileAlignment[0]->f[z][b]=motifSet[x]->f[z][b];
				}else{
					//Add a motif to an existing alignment
					winner->alignment = MAman->SingleProfileAddition(winner->alignment, Plat->inputMotifs[x], x);
					winner->members++;
					winner->avgPval = (winner->avgPval*(((double)winner->members-1)/(double)winner->members))+(winningPVal*(1/(double)winner->members));
				}
				Child* tmp=new Child();
				tmp->next = winner->progeny;
				tmp->m=motifSet[x];
				tmp->mID=x;
				winner->progeny = tmp;
			}
			//Update nodes based on current contents
			InorderAdjustModels(root);
			t++;
			total_t++;	
		}while(t<CYCLE_MAX && total_t<MAX_T);
       
			
		//Calculate (& print) internal homogeneities
		numLeavesNonZero=0;lowestIntSim = 10000;
		InorderCalcIntSim(root, numLeavesNonZero, lowestIntSim, lowSimNode);
		printf("nonZero: %.0lf\tLowestIntSim: %lf\n", numLeavesNonZero, lowestIntSim);
		double totalH=0;
		InorderCalcIntHomogeneity(root, totalH); totalH=totalH/numLeavesNonZero;
		printf("Family-level Homogeneity: %lf\n", totalH);
		
		//Calculate (& print) inter-cluster distances
		double highestSim=0;
		InorderFindMostSimilarClusters(root, highestSim);
		printf("Maximum inter-cluster distance: %lf\n", highestSim);
		
		//Split the node with the lowest homogeneity
		TreeNode* node2Split=NULL;double lowestIntPSim =10000;printf("Nodes:\t");
		InorderFindNodeToSplit(root, lowestIntPSim, node2Split);
		printf("\nNode2Split: %d\tLowestIntPSim: %lf\n\n", node2Split->nodeID, lowestIntPSim);
		SplitNode(node2Split);

	}
  
}
Ejemplo n.º 21
0
void SplitNode(Node *node, double **data, int n, int first, int level) {

/* Creates two branches of the decision tree on the array data. End condition
 * creates leaf if the purity of the node is small or if there are few
 * samples on the branch of node
 *
 * node  = pointer to node in decision tree
 * data  = table of unsorted data with features and labels (with last
 *         column as the label (data[i][d-1]))
 * n     = length of table (# of rows/samples) on branch of node
 * first = first index of samples on branch of node
 * level = the depth of node in the tree
 */
    
    timestamp_type sort_start, sort_stop, split_start, split_stop;
    double sort_time = 0.;
    double split_time = 0.;

    int max_level = 3;
    int min_points = 6;

    node->left = NULL;
    node->right = NULL;
    node->index = -1;

    //Get initial counts for positive/negative labels
    int i;
    int pos = 0;
    double pos_w = 0;//positive weight
    double tot = 0;//total weight
    for (i = 0; i < n; ++i) {
        tot += data[first+i][D];

        if (data[first+i][D-1] > 0){
            pos += 1;
            pos_w += data[first+i][D];
        }
    }
    int neg = n - pos;
    double neg_w = tot - pos_w;
    
    //Declare class for node in case of pruning on child
    if (pos_w > neg_w)
        node->label = 1;
    else if (pos_w < neg_w)
        node->label = -1;
    else if (node->parent)
        node->label = node->parent->label;
    else {
        //printf("Root node is evenly balanced.\n");
        node->label = 0;
    }

    //If branch is small or almost pure, make leaf
    if (n < min_points) {
        //printf("small branch: %d points\n", n, level);
        return;
    }
    else if (level == max_level) {
        //printf("leaf node: level = max\n");
        return;
    }
    else if (pos == 0 || neg == 0) {
        //printf("pure node\n");
        return;
    }

    ///////////////TEST//////////////////
    //printf("LEVEL: %d\n", level);
    //printf("pos=%d, neg=%d, posw=%f, negw=%f, lab=%f\n", pos, neg, pos_w, neg_w, node->label);
    //printf("GINI: %f\n", GINI(pos_w, tot));
    /////////////////////////////////////


    int col;
    int row; //best row to split at for particular column/feature
    int localrow; //first + localrow = row; receives BestSplit which returns integer in [-1, n-1]
    double threshold; //best threshold to split at for column/feature
    double impurity; //impurity for best split in feature/column
    int bestcol = -1; //feature with best split
    int bestrow = first+n-1; //best row to split for best feature
    double bestthresh; //threshold split for best feature (data[bestrow][bestcol])
    double Pmin = GINI(pos_w, tot); //minimum impurity seen so far

    //Sort table. Then find best column/feature, threshold, and impurity
    for (col = 0; col < D-1; ++col) {
        //printf("\r%5d/%5d", col, D);
        //fflush(stdout);
        get_timestamp(&sort_start);
        Sort(data, first, first+n-1, col);
        get_timestamp(&sort_stop);
        get_timestamp(&split_start);
        localrow = WeightedBestSplit(data, n, first, col, pos_w, tot, &impurity);
        get_timestamp(&split_stop);
        sort_time += timestamp_diff_in_seconds(sort_start, sort_stop);
        split_time += timestamp_diff_in_seconds(split_start, split_stop);
        row = first + localrow;
        threshold = data[row][col];

        //If current column has better impurity, save col, thresh, and Pmin
        if (impurity < Pmin) {
            bestcol = col;
            bestrow = row;
            bestthresh = threshold;
            Pmin = impurity;
        }
    }
    //printf("\r           \r");
    //printf("Sort  time: %f sec\nSplit time: %f sec\n", sort_time, split_time);

    //If splitting doesn't improve purity (best split is at the end) stop
    if (bestrow == first+n-1) {
        //printf("no improvement\n");
        return;
    }


    Sort(data, first, first+n-1, bestcol);

    //For feature, threshold with best impurity, save to node attributes
    node->index = bestcol;
    node->threshold = bestthresh;

    printf("Best feature: %d, Best thresh: %f, Impurity: %f\n", node->index, node->threshold, Pmin);

    //Create right and left children
    Node *l = malloc(sizeof(Node));
    Node *r = malloc(sizeof(Node));
    l->parent = node;
    r->parent = node;
    l->right = NULL;
    l->left = NULL;
    r->right = NULL;
    r->left = NULL;
    
    node->left = l;
    node->right = r;

    int first_r = bestrow+1;
    int n_l = first_r - first;
    int n_r = n - n_l;

    //printf("LEFT\n");
    SplitNode(l, data, n_l, first, level+1);
    //printf("RIGHT\n");
    SplitNode(r, data, n_r, first_r, level+1);

    return;
}
Ejemplo n.º 22
0
/**********************************************************************
 *                   TABMAPIndexBlock::AddEntry()
 *
 * Recursively search the tree until we encounter the best leaf to
 * contain the specified object MBR and add the new entry to it.
 *
 * In the even that the selected leaf node would be full, then it will be
 * split and this split can propagate up to its parent, etc.
 *
 * If bAddInThisNodeOnly=TRUE, then the entry is added only locally and
 * we do not try to update the child node.  This is used when the parent 
 * of a node that is being splitted has to be updated.
 *
 * Returns 0 on success, -1 on error.
 **********************************************************************/
int     TABMAPIndexBlock::AddEntry(GInt32 nXMin, GInt32 nYMin,
                                   GInt32 nXMax, GInt32 nYMax,
                                   GInt32 nBlockPtr,
                                   GBool bAddInThisNodeOnly /*=FALSE*/)
{
    int i;
    GBool bFound = FALSE;

    if (m_eAccess != TABWrite && m_eAccess != TABReadWrite)
    {
        CPLError(CE_Failure, CPLE_AssertionFailed,
               "Failed adding index entry: File not opened for write access.");
        return -1;
    }

    /*-----------------------------------------------------------------
     * Update MBR now... even if we're going to split current node later.
     *----------------------------------------------------------------*/
    if (nXMin < m_nMinX)
        m_nMinX = nXMin;
    if (nXMax > m_nMaxX)
        m_nMaxX = nXMax;
    
    if (nYMin < m_nMinY)
        m_nMinY = nYMin;
    if (nYMax > m_nMaxY)
        m_nMaxY = nYMax;

    /*-----------------------------------------------------------------
     * Look for the best candidate to contain the new entry
     * __TODO__ For now we'll just look for the first entry that can 
     *          contain the MBR, but we could probably have a better
     *          search criteria to optimize the resulting tree
     *----------------------------------------------------------------*/

    /*-----------------------------------------------------------------
     * If bAddInThisNodeOnly=TRUE then we add the entry only locally
     * and do not need to look for the proper leaf to insert it.
     *----------------------------------------------------------------*/
    if (bAddInThisNodeOnly)
        bFound = TRUE;

    /*-----------------------------------------------------------------
     * First check if current child could be a valid candidate.
     *----------------------------------------------------------------*/
    if (!bFound &&
        m_poCurChild && (m_asEntries[m_nCurChildIndex].XMin <= nXMin &&
                         m_asEntries[m_nCurChildIndex].XMax >= nXMax &&
                         m_asEntries[m_nCurChildIndex].YMin <= nYMin &&
                         m_asEntries[m_nCurChildIndex].YMax >= nYMax ) )
    {

        bFound = TRUE;
    }

    /*-----------------------------------------------------------------
     * Scan all entries to find a valid candidate
     * We look for the entry whose center is the closest to the center
     * of the object to add.
     *----------------------------------------------------------------*/
    if (!bFound)
    {
        int nObjCenterX = (nXMin + nXMax)/2;
        int nObjCenterY = (nYMin + nYMax)/2;

        // Make sure blocks currently in memory are written to disk.
        if (m_poCurChild)
        {
            m_poCurChild->CommitToFile();
            delete m_poCurChild;
            m_poCurChild = NULL;
            m_nCurChildIndex = -1;
        }

        // Look for entry whose center is closest to center of new object
        int nBestCandidate = -1;
        int nMinDist = 2000000000;

        for(i=0; i<m_numEntries; i++)
        {
            int nX = (m_asEntries[i].XMin + m_asEntries[i].XMax)/2;
            int nY = (m_asEntries[i].YMin + m_asEntries[i].YMax)/2;

            int nDist = (nX-nObjCenterX)*(nX-nObjCenterX) +
                             (nY-nObjCenterY)*(nY-nObjCenterY);

            if (nBestCandidate==-1 || nDist < nMinDist)
            {
                nBestCandidate = i;
                nMinDist = nDist;
            }
        }
        
        if (nBestCandidate != -1)
        {
            // Try to load corresponding child... if it fails then we are
            // likely in a leaf node, so we'll add the new entry in the current
            // node.
            TABRawBinBlock *poBlock = NULL;

            // Prevent error message if referred block not committed yet.
            CPLPushErrorHandler(CPLQuietErrorHandler);

            if ((poBlock = TABCreateMAPBlockFromFile(m_fp, 
                                       m_asEntries[nBestCandidate].nBlockPtr,
                                       512, TRUE, TABReadWrite)) &&
                poBlock->GetBlockClass() == TABMAP_INDEX_BLOCK)
            {
                m_poCurChild = (TABMAPIndexBlock*)poBlock;
                poBlock = NULL;
                m_nCurChildIndex = nBestCandidate;
                m_poCurChild->SetParentRef(this);
                m_poCurChild->SetMAPBlockManagerRef(m_poBlockManagerRef);
                bFound = TRUE;
            }
                
            if (poBlock)
                delete poBlock;
            
            CPLPopErrorHandler();
            CPLErrorReset();
        }
    }

    if (bFound && !bAddInThisNodeOnly)
    {
        /*-------------------------------------------------------------
         * Found a child leaf... pass the call to it.
         *------------------------------------------------------------*/
        if (m_poCurChild->AddEntry(nXMin, nYMin, nXMax, nYMax, nBlockPtr) != 0)
            return -1;
    }
    else
    {
        /*-------------------------------------------------------------
         * Found no child to store new object... we're likely at the leaf
         * level so we'll store new object in current node
         *------------------------------------------------------------*/

        /*-------------------------------------------------------------
         * First thing to do is make sure that there is room for a new
         * entry in this node, and to split it if necessary.
         *------------------------------------------------------------*/
        if (GetNumFreeEntries() < 1)
        {
            if (m_poParentRef == NULL)
            {
                /*-----------------------------------------------------
                 * Splitting the root node adds one level to the tree, so
                 * after splitting we just redirect the call to the new
                 * child that's just been created.
                 *----------------------------------------------------*/
                if (SplitRootNode((nXMin+nXMax)/2, (nYMin+nYMax)/2) != 0)
                    return -1;  // Error happened and has already been reported

                CPLAssert(m_poCurChild);
                return m_poCurChild->AddEntry(nXMin, nYMin, nXMax, nYMax,
                                              nBlockPtr, TRUE);
            }
            else
            {
                /*-----------------------------------------------------
                 * Splitting a regular node
                 *----------------------------------------------------*/
                if (SplitNode((nXMin+nXMax)/2, (nYMin+nYMax)/2) != 0)
                    return -1; 
            }
        }

        if (InsertEntry(nXMin, nYMin, nXMax, nYMax, nBlockPtr) != 0)
            return -1;
    }

    /*-----------------------------------------------------------------
     * Update current node MBR and the reference to it in our parent.
     *----------------------------------------------------------------*/
    RecomputeMBR();

    return 0;
}
Ejemplo n.º 23
0
void Tree::Train(vector<Sample> &samples,
	const Mat_<double> &meanShape,
	int stages_,
	int landmarkID_
	) {
	// set parameters 
	landmarkID = landmarkID_;
	numFeats = GlobalParams::numFeats[stages_];
	radioRadius = GlobalParams::radius[stages_];
	numNodes = 1;
	numLeafNodes = 1;

	// index: indicates the training samples id in training data set
	int num_nodes_iter;
	int num_split;
	for (int i = 0; i < samples.size(); i++) {
		
		// push the indies of training samples into root node
		nodes[0].sample_idx.push_back(i);
	}

	// initialize the root
	nodes[0].isSplit = false;
	nodes[0].pNodeID = 0;
	nodes[0].depth = 1;
	nodes[0].cNodesID[0] = 0;
	nodes[0].cNodesID[1] = 0;
	nodes[0].isLeaf = true;
	nodes[0].threshold = 0;
	nodes[0].feat[0].x = 1;
	nodes[0].feat[0].y = 1;
	nodes[0].feat[1].x = 1;
	nodes[0].feat[1].y = 1;

	bool stop = false;
	int num_nodes = 1;
	int num_leafnodes = 1;
	double thresh;
	Point2d feat[2];

	vector<int> lcID, rcID;
	lcID.reserve(nodes[0].sample_idx.size());
	rcID.reserve(nodes[0].sample_idx.size());
	while (!stop) {
		num_nodes_iter = num_nodes;
		num_split = 0;
		for (int n = 0; n < num_nodes_iter; n++) {
			if (!nodes[n].isSplit) {
				if (nodes[n].depth == maxDepth) {
					nodes[n].isSplit = true;
				}
			}
			else {
				// separate the training samples into left and right path
				// splite the tree
				// In each internal node, we randomly choose to either minimize the 
				// binary entropy for classification (with probablity p)
				// or the variance of ficial point increments for regression
				// (with probability 1-p)
				RNG randonGenerator(getTickCount());
				double p = 1 - 0.1 * stages_;
				double val = randonGenerator.uniform(0.0, 1.0);
				if (val <= p) {
					SplitNode(CLASSIFICATION, samples, meanShape, nodes[n].sample_idx,
						thresh, feat, lcID, rcID);
				}
				else {
					SplitNode(REGRESSION, samples, meanShape, nodes[n].sample_idx,
						thresh, feat, lcID, rcID);
				}

				// set the threshold and feature for current node
				nodes[n].feat[0] = feat[0];
				nodes[n].feat[1] = feat[1];
				nodes[n].threshold = thresh;
				nodes[n].isSplit = true;
				nodes[n].isLeaf = false;
				nodes[n].cNodesID[0] = num_nodes;
				nodes[n].cNodesID[1] = num_nodes + 1;

				// add left and right child into the random tree
				nodes[num_nodes].sample_idx = lcID;
				nodes[num_nodes].isSplit = false;
				nodes[num_nodes].pNodeID = n;
				nodes[num_nodes].depth = nodes[n].depth + 1;
				nodes[num_nodes].cNodesID[0] = 0;
				nodes[num_nodes].cNodesID[1] = 0;
				nodes[num_nodes].isLeaf = true;

				nodes[num_nodes + 1].sample_idx = rcID;
				nodes[num_nodes + 1].isSplit = false;
				nodes[num_nodes + 1].pNodeID = n;
				nodes[num_nodes + 1].depth = nodes[n].depth + 1;
				nodes[num_nodes + 1].cNodesID[0] = 0;
				nodes[num_nodes + 1].cNodesID[1] = 0;
				nodes[num_nodes + 1].isLeaf = true;

				num_split++;
				num_leafnodes++;
				num_nodes += 2;
			}
		}
		if (num_split == 0) {
			stop = 1;
		}
		else {
			numNodes = num_nodes;
			numLeafNodes = num_leafnodes;
		}
	}

	// mark leaf nodes.
	// clear sample indices in each node
	leafID.clear();
	for (int i = 0; i < numNodes; i++) {
		nodes[i].sample_idx.clear();
		if (nodes[i].isLeaf) {
			leafID.push_back(i);
		}
	}
}
Ejemplo n.º 24
0
// *********************************************************************************** //
//Train 2 nodes based on their parent's children
void SOTA::PreOrderBuildNodes(TreeNode* curr)
{
	int j;
	Child* c; 
	//Start with a node... split it if there are more than one children in the current parent
	if(curr->members==2){
		SplitNode(curr);
		if(curr->left->members>0 && curr->right->members>0)
			numLeavesNonZero++;
		if(!treeTesting)
		{//	printf("\nLeaves: %d, NZLeaves: %.0lf, Split: %d, LeftAfterSplit: %did %dm, RightAfterSplit: %did %dm\n", numLeaves, numLeavesNonZero, curr->nodeID, curr->left->nodeID, curr->left->members, curr->right->nodeID, curr->right->members);
		//	curr->left->profile->PrintMotifConsensus();
		//	curr->right->profile->PrintMotifConsensus();
		}
		//Recursion
		PreOrderBuildNodes(curr->left);
		PreOrderBuildNodes(curr->right);
	}else if(curr->members>1 && CalcAvgIntPairwise(curr)<intSimThres){
		SplitNode(curr);
        //Parent now split... train the new leaves
		int z,b;
		double winningScore=0, winningPVal=0, lowestIntSim, lastNLNZ=0;
		TreeNode* winner=NULL;
		TreeNode* lowSimNode=NULL;
		char tmpName[STR_LEN];
		int mi1, mi2; double pS; bool mforward1, mforward2;

		t=0;
		do{
			//Reset the nodes
			InorderReset(curr);
			//Find & update winners
			for(c=curr->progeny; c!=NULL; c=c->next)
			{
				winningScore=-100000; winningPVal=-100000; winner=NULL;
				InorderFindWinner(curr, motifSet[c->mID], winner, mi1, mi2, mforward1, mforward2, winningScore, winningPVal);
				//add the motif to the winning node
				if(winner->members==0){
					if(winner->alignment!=NULL)
						delete winner->alignment;
					winner->alignment = new MultiAlignRec(1, motifSet[c->mID]->GetLen());
					strcpy(winner->alignment->alignedNames[0], motifSet[c->mID]->name);
					strcpy(winner->alignment->profileAlignment[0]->name, motifSet[c->mID]->name);
					winner->alignment->alignedIDs[0] = c->mID; 
					winner->members=1;
					winner->avgPval=winningPVal;
					//initialise the alignment
					for(z=0; z<motifSet[c->mID]->GetLen(); z++)
						for(b=0; b<B; b++)
							winner->alignment->profileAlignment[0]->f[z][b]=motifSet[c->mID]->f[z][b];
				}else{
					//Add a motif to an existing alignment
					winner->alignment = MAman->SingleProfileAddition(winner->alignment, Plat->inputMotifs[c->mID], c->mID);
					winner->members++;
					winner->avgPval = (winner->avgPval*(((double)winner->members-1)/(double)winner->members))+(winningPVal*(1/(double)winner->members));
				}
			}
			//Adjust the models (Neighbourhood update)
			InorderAdjustModels(curr);
			t++;
			total_t++;	
		}while(t<CYCLE_MAX && total_t<MAX_T);
		
		//Add the children to each leaf
		for(j=0; j<curr->left->members; j++){
			Child* tmp=new Child();
			tmp->next = curr->left->progeny;
			tmp->m=motifSet[curr->left->alignment->alignedIDs[j]];
			tmp->mID=curr->left->alignment->alignedIDs[j];
			curr->left->progeny = tmp;
		}for(j=0; j<curr->right->members; j++){
			Child* tmp=new Child();
			tmp->next = curr->right->progeny;
			tmp->m=motifSet[curr->right->alignment->alignedIDs[j]];
			tmp->mID=curr->right->alignment->alignedIDs[j];
			curr->right->progeny = tmp;
		}

		if(curr->left->members>0 && curr->right->members>0)
			numLeavesNonZero++;

		if(!treeTesting)
		{//	printf("\nLeaves: %d, NZLeaves: %.0lf, Split: %d, LeftAfterSplit: %did %dm, RightAfterSplit: %did %dm\n", numLeaves, numLeavesNonZero, curr->nodeID, curr->left->nodeID, curr->left->members, curr->right->nodeID, curr->right->members);
		//	curr->left->profile->PrintMotifConsensus();
		//	curr->right->profile->PrintMotifConsensus();
		}

		//Recursion
		PreOrderBuildNodes(curr->left);
		PreOrderBuildNodes(curr->right);
	}
}