BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t begin, const size_t count, std::vector<size_t>& oldFromNew, std::vector<size_t>& newFromOld, BinarySpaceTree* parent, const size_t maxLeafSize) : left(NULL), right(NULL), parent(parent), begin(begin), count(count), maxLeafSize(maxLeafSize), bound(data.n_rows), dataset(data) { // Hopefully the vector is initialized correctly! We can't check that // entirely but we can do a minor sanity check. // Perform the actual splitting. SplitNode(data, oldFromNew); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); // Map the newFromOld indices correctly. newFromOld.resize(data.n_cols); for (size_t i = 0; i < data.n_cols; i++) newFromOld[oldFromNew[i]] = i; }
void CallChainRoot::AddCallChain(const std::vector<SampleEntry*>& callchain, uint64_t period) { children_period += period; CallChainNode* p = FindMatchingNode(children, callchain[0]); if (p == nullptr) { std::unique_ptr<CallChainNode> new_node = AllocateNode(callchain, 0, period, 0); children.push_back(std::move(new_node)); return; } size_t callchain_pos = 0; while (true) { size_t match_length = GetMatchingLengthInNode(p, callchain, callchain_pos); CHECK_GT(match_length, 0u); callchain_pos += match_length; bool find_child = true; if (match_length < p->chain.size()) { SplitNode(p, match_length); find_child = false; // No need to find matching node in p->children. } if (callchain_pos == callchain.size()) { p->period += period; return; } p->children_period += period; if (find_child) { CallChainNode* np = FindMatchingNode(p->children, callchain[callchain_pos]); if (np != nullptr) { p = np; continue; } } std::unique_ptr<CallChainNode> new_node = AllocateNode(callchain, callchain_pos, period, 0); p->children.push_back(std::move(new_node)); break; } }
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>:: SpillTree( MatType&& data, const double tau, const size_t maxLeafSize, const double rho) : left(NULL), right(NULL), parent(NULL), count(0), pointsIndex(NULL), overlappingNode(false), hyperplane(), bound(data.n_rows), parentDistance(0), // Parent distance for the root is 0: it has no parent. dataset(new MatType(std::move(data))), localDataset(true) { arma::Col<size_t> points; if (dataset->n_cols > 0) // Fill points with all possible indexes: 0 .. (dataset->n_cols - 1). points = arma::linspace<arma::Col<size_t>>(0, dataset->n_cols - 1, dataset->n_cols); // Do the actual splitting of this node. SplitNode(points, maxLeafSize, tau, rho); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
void TreeCommon::SplitNodeRecursively(int node_id, float std_dev_threshold) { queue<CNode*> node_queue; node_queue.push(this->GetNode(node_id)); while (node_queue.size() > 0) { CNode* temp_node = node_queue.front(); node_queue.pop(); if (temp_node->type() == CNode::BRANCH) { CBranch* branch = dynamic_cast<CBranch*>(temp_node); if (branch == NULL || branch->level() > MAX_ALLOWED_LEVEL) continue; float accu_std_dev = 0.0; for (int i = 0; i < point_dataset_->var_num; ++i) { accu_std_dev += branch->std_deviations[i] * point_dataset_->var_weights[i]; } if ((branch == root_ || accu_std_dev > std_dev_threshold) && branch->point_count > 10) { SplitNode(branch); vector<vector<float>> center_pos; for (int i = 0; i < branch->linked_nodes.size(); ++i) center_pos.push_back(branch->linked_nodes[i]->center_pos); branch->average_dis = Utility::GetAverageDistance(center_pos); if (branch->level() + 1 > this->max_level_) this->max_level_ = branch->level() + 1; for (int i = 0; i < branch->linked_nodes.size(); ++i) { id_node_map_.insert(map<int, CNode*>::value_type(branch->linked_nodes[i]->id(), branch->linked_nodes[i])); } } for (int i = 0; i < branch->linked_nodes.size(); ++i) node_queue.push(branch->linked_nodes[i]); } } }
void TreeCommon::SplitNodeOnce(int node_id) { CNode* node = this->GetNode(node_id); if (node == NULL || node->type() != CNode::BRANCH) { cout << "Error Split Node: " << node_id << endl; return; } CBranch* branch = (CBranch*)node; bool is_children_all_leaf = true; for (int i = 0; i < branch->linked_nodes.size(); ++i) if (branch->linked_nodes[i]->type() != CNode::LEAF) { is_children_all_leaf = false; break; } if (is_children_all_leaf && branch->linked_nodes.size() > 2) { SplitNode(branch); vector<vector<float>> center_pos; for (int i = 0; i < branch->linked_nodes.size(); ++i) center_pos.push_back(branch->linked_nodes[i]->center_pos); branch->average_dis = Utility::GetAverageDistance(center_pos); if (branch->level() + 1 > this->max_level_) this->max_level_ = branch->level() + 1; for (int i = 0; i < branch->linked_nodes.size(); ++i) { id_node_map_.insert(map<int, CNode*>::value_type(branch->linked_nodes[i]->id(), branch->linked_nodes[i])); } } }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t begin, const size_t count, std::vector<size_t>& oldFromNew, SplitType& splitter, BinarySpaceTree* parent, const size_t maxLeafSize) : left(NULL), right(NULL), parent(parent), begin(begin), count(count), bound(data.n_rows), dataset(data) { // Hopefully the vector is initialized correctly! We can't check that // entirely but we can do a minor sanity check. assert(oldFromNew.size() == data.n_cols); // Perform the actual splitting. SplitNode(data, oldFromNew, maxLeafSize, splitter); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, std::vector<size_t>& oldFromNew, std::vector<size_t>& newFromOld, const size_t maxLeafSize) : left(NULL), right(NULL), parent(NULL), begin(0), count(data.n_cols), maxLeafSize(maxLeafSize), bound(data.n_rows), parentDistance(0), // Parent distance for the root is 0: it has no parent. dataset(data) { // Initialize the oldFromNew vector correctly. oldFromNew.resize(data.n_cols); for (size_t i = 0; i < data.n_cols; i++) oldFromNew[i] = i; // Fill with unharmed indices. // Now do the actual splitting. SplitNode(data, oldFromNew); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); // Map the newFromOld indices correctly. newFromOld.resize(data.n_cols); for (size_t i = 0; i < data.n_cols; i++) newFromOld[oldFromNew[i]] = i; }
void BuildTree(Node *root, double **data, int n) { /* Wrapper function to initiate decision tree construction */ SplitNode(root, data, n, 0, 0); return; }
bool AddBranch(RtreeBranch *br, RtreeNode *node, Node *new_node) { if (node->count < M) { node->branch[node->count++] = *br; return false; } assert(node->count == M); SplitNode(node, br, new_node); assert(node->count + (*new_node)->count == M + 1); return true; }
void NormalInsert(pBtree p,int key) { int pos; pos=Find(p->Key,1,p->Sum,key); if(IsLeaf(p)) { InsertKeyToLeaf(p,key,pos); } else { if(NeedSplit(p->Child[pos-1])) { SplitNode(p,pos); pos=Find(p->Key,1,p->Sum,key); } NormalInsert(p->Child[pos-1],key); } }
RTREE_TEMPLATE bool RTREE_QUAL::AddBranch(const Branch* a_branch, Node* a_node, Node** a_newNode) { ASSERT(a_branch); ASSERT(a_node); if (a_node->m_count < MAXNODES) // Split won't be necessary { a_node->m_branch[a_node->m_count] = *a_branch; ++a_node->m_count; return false; } else { ASSERT(a_newNode); SplitNode(a_node, a_branch, a_newNode); return true; } }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t maxLeafSize) : left(NULL), right(NULL), parent(NULL), begin(0), /* This root node starts at index 0, */ count(data.n_cols), /* and spans all of the dataset. */ bound(data.n_rows), parentDistance(0), // Parent distance for the root is 0: it has no parent. dataset(data) { // Do the actual splitting of this node. SplitNode(data, maxLeafSize); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
void OpsRTree::AdjustRTree(RTreeNode *newNode) { // get the pointer to the node that is on top of the stack RTreeNode *node = m_nodePtrStack.Top(); assert(node != NULL); m_nodePtrStack.Pop(); // walk through the node pointers on the stack, adjusting node extents, // and splitting nodes where required while (!m_nodePtrStack.IsEmpty()) { RTreeNode *parent = m_nodePtrStack.Top(); if (newNode == NULL) parent->m_nodeExtent.UnionWith(&node->m_nodeExtent); else if (parent->IsNodeFull()) newNode = SplitNode(parent, newNode, &newNode->m_nodeExtent); else { parent->m_nodeExtent.UnionWith(&node->m_nodeExtent); parent->AddChild(newNode); parent->m_nodeExtent.UnionWith(&newNode->m_nodeExtent); newNode = NULL; } node = parent; m_nodePtrStack.Pop(); } // if the root node was split, then create a new one, growing the tree // in height by 1 if (newNode != NULL) { assert(node == m_rootNode); m_rootNode = m_nodeAllocator.Allocate(); m_rootNode->Initialize(node->m_nodeLevel + 1); m_rootNode->m_child[0] = node; m_rootNode->m_nodeExtent = node->m_nodeExtent; m_rootNode->m_child[1] = newNode; m_rootNode->m_nodeExtent.UnionWith(&newNode->m_nodeExtent); m_rTreeHeight++; } } // end: AdjustRTree()
void QuadTree::BuildStaticTree( vector<GameObject*> objects, Point3* location, float width, float height ) { rootStatic = new MyNode(); rootStatic->myObjects = vector<GameObject*>(); rootStatic->wallPoints = vector<Point3*>(); //myObjects = vector<GameObject*>(); for (int i = 0; i<objects.size();i++) { //myObjects.push_back(objects[i]); rootStatic->myObjects.push_back(objects[i]); } // rootStatic->tree = this; rootStatic->width = width; rootStatic->height = height; rootStatic->position = location; SplitNode(rootStatic, 0); }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t begin, const size_t count, BinarySpaceTree* parent, const size_t maxLeafSize) : left(NULL), right(NULL), parent(parent), begin(begin), count(count), bound(data.n_rows), dataset(data) { // Perform the actual splitting. SplitNode(data, maxLeafSize); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
int Insert(node **tree, int value) { int i, j; node *temp = *tree; node *parent = NULL; if (*tree == NULL) { *tree = CreateNode(value, NULL, NULL); elements++; ////////// return 1; } while (1) { if (temp->order == 4) temp = SplitNode(temp, parent); i = 0; while ((i < (temp->order - 1)) && (value >= temp->item[i])) { if (value == temp->item[i]) return 0; i++; } if (temp->child[i] != NULL) { parent = temp; temp = temp->child[i]; } else { j = temp->order - 1; while (j > i) { temp->item[j] = temp->item[j-1]; j--; } temp->item[i] = value; temp->order++; elements++; ////////// return 1; } } }
void OpsRTree::Insert(void *item, const OpsFloatExtent *itemExtent, int level) { // choose the best node at the specified level to insert into RTreeNode *node = ChooseNode(itemExtent, level); // add the item to the node, splitting it, if it is already full RTreeNode *newNode; if (node->IsNodeFull()) newNode = SplitNode(node, item, itemExtent); else { newNode = NULL; node->AddChild(item); node->m_nodeExtent.UnionWith(itemExtent); } // adjust the nodes at the search path from the root AdjustRTree(newNode); } // end: Insert()
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>:: SpillTree( SpillTree* parent, arma::Col<size_t>& points, const double tau, const size_t maxLeafSize, const double rho) : left(NULL), right(NULL), parent(parent), count(0), pointsIndex(NULL), overlappingNode(false), hyperplane(), bound(parent->Dataset().n_rows), dataset(&parent->Dataset()), // Point to the parent's dataset. localDataset(false) { // Perform the actual splitting. SplitNode(points, maxLeafSize, tau, rho); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
void QuadTree::SplitNode( MyNode* n, int steps ) { if (steps>4 || n->myObjects.size() < TOLERANCE) { for (int i = 0;i<n->myObjects.size();i++) { if ((n->myObjects)[i]->objectType == GameObject::type::BALL) { n->hasBall = true; break; } else { n->hasBall = false; } } return; } n->tl = new MyNode(); n->tr = new MyNode(); n->bl = new MyNode(); n->br = new MyNode(); float w = n->width/2.0, h = n->height/2.0; n->tl->width = n->tr->width = n->bl->width = n->br->width = w; n->tl->height = n->tr->height = n->bl->height = n->br->height = h; //n->tl-> n->tr-> = n->bl-> = n->br-> = ; n->bl->position = n->position; n->br->position = new Point3(n->position->x+w, n->position->y, n->position->z); n->tl->position = new Point3(n->position->x, n->position->y+h, n->position->z); n->tr->position = new Point3(n->position->x+w, n->position->y+h, n->position->z); n->tl->myObjects = vector<GameObject*>(); n->tr->myObjects = vector<GameObject*>(); n->bl->myObjects = vector<GameObject*>(); n->br->myObjects = vector<GameObject*>(); n->tl->wallPoints = n->wallPoints; n->tr->wallPoints = n->wallPoints; n->bl->wallPoints = n->wallPoints; n->br->wallPoints = n->wallPoints; n->tl->hasWall = n->hasWall; n->tr->hasWall = n->hasWall; n->bl->hasWall = n->hasWall; n->br->hasWall = n->hasWall; for (int i = 0;i<n->myObjects.size();i++) { GameObject* obj = (n->myObjects)[i]; if ((n->myObjects)[i]->position.x >= n->tr->position->x) { if ((n->myObjects)[i]->position.y >= n->tr->position->y) { n->tr->myObjects.push_back((n->myObjects)[i]); } else { n->br->myObjects.push_back((n->myObjects)[i]); } } else { if ((n->myObjects)[i]->position.y >= n->tl->position->y) { n->tl->myObjects.push_back((n->myObjects)[i]); } else { n->bl->myObjects.push_back((n->myObjects)[i]); } } } // for now don't remove objects from parent nodes // n->tl->tree = n->tr->tree = n->bl->tree = n->br->tree = this; SplitNode(n->tl, steps+1); SplitNode(n->tr, steps+1); SplitNode(n->bl, steps+1); SplitNode(n->br, steps+1); }
//Train 2 nodes based on all input set void TopDownHClust::BuildNodes(TreeNode* curr) { int j; Child* c; int z,b; double winningScore=0, winningPVal=0, lowestIntSim, lastNLNZ=0; TreeNode* winner=NULL; TreeNode* lowSimNode=NULL; char tmpName[STR_LEN]; int mi1, mi2; double pS; bool mforward1, mforward2; printf("Building TDHC\n"); t=0; //Starts with a single node... split it SplitNode(root); while(numLeaves<numMotifs){printf("***%d Leaves\n", numLeaves); t=0; //Do CYCLE_MAX times do{ //Reset the nodes InorderReset(curr); //Assign input motifs to the most similar nodes for(int x=0; x<numMotifs; x++) { winningScore=-100000; winningPVal=-100000; winner=NULL; InorderFindWinner(root, motifSet[x], winner, mi1, mi2, mforward1, mforward2, winningScore, winningPVal); //add the motif to the winning node if(winner->members==0){ if(winner->alignment!=NULL) delete winner->alignment; winner->alignment = new MultiAlignRec(1, motifSet[x]->GetLen()); strcpy(winner->alignment->alignedNames[0], motifSet[x]->name); strcpy(winner->alignment->profileAlignment[0]->name, motifSet[x]->name); winner->alignment->alignedIDs[0] = x; winner->members=1; winner->avgPval=winningPVal; //initialise the alignment for(z=0; z<motifSet[x]->GetLen(); z++) for(b=0; b<B; b++) winner->alignment->profileAlignment[0]->f[z][b]=motifSet[x]->f[z][b]; }else{ //Add a motif to an existing alignment winner->alignment = MAman->SingleProfileAddition(winner->alignment, Plat->inputMotifs[x], x); winner->members++; winner->avgPval = (winner->avgPval*(((double)winner->members-1)/(double)winner->members))+(winningPVal*(1/(double)winner->members)); } Child* tmp=new Child(); tmp->next = winner->progeny; tmp->m=motifSet[x]; tmp->mID=x; winner->progeny = tmp; } //Update nodes based on current contents InorderAdjustModels(root); t++; total_t++; }while(t<CYCLE_MAX && total_t<MAX_T); //Calculate (& print) internal homogeneities numLeavesNonZero=0;lowestIntSim = 10000; InorderCalcIntSim(root, numLeavesNonZero, lowestIntSim, lowSimNode); printf("nonZero: %.0lf\tLowestIntSim: %lf\n", numLeavesNonZero, lowestIntSim); double totalH=0; InorderCalcIntHomogeneity(root, totalH); totalH=totalH/numLeavesNonZero; printf("Family-level Homogeneity: %lf\n", totalH); //Calculate (& print) inter-cluster distances double highestSim=0; InorderFindMostSimilarClusters(root, highestSim); printf("Maximum inter-cluster distance: %lf\n", highestSim); //Split the node with the lowest homogeneity TreeNode* node2Split=NULL;double lowestIntPSim =10000;printf("Nodes:\t"); InorderFindNodeToSplit(root, lowestIntPSim, node2Split); printf("\nNode2Split: %d\tLowestIntPSim: %lf\n\n", node2Split->nodeID, lowestIntPSim); SplitNode(node2Split); } }
void SplitNode(Node *node, double **data, int n, int first, int level) { /* Creates two branches of the decision tree on the array data. End condition * creates leaf if the purity of the node is small or if there are few * samples on the branch of node * * node = pointer to node in decision tree * data = table of unsorted data with features and labels (with last * column as the label (data[i][d-1])) * n = length of table (# of rows/samples) on branch of node * first = first index of samples on branch of node * level = the depth of node in the tree */ timestamp_type sort_start, sort_stop, split_start, split_stop; double sort_time = 0.; double split_time = 0.; int max_level = 3; int min_points = 6; node->left = NULL; node->right = NULL; node->index = -1; //Get initial counts for positive/negative labels int i; int pos = 0; double pos_w = 0;//positive weight double tot = 0;//total weight for (i = 0; i < n; ++i) { tot += data[first+i][D]; if (data[first+i][D-1] > 0){ pos += 1; pos_w += data[first+i][D]; } } int neg = n - pos; double neg_w = tot - pos_w; //Declare class for node in case of pruning on child if (pos_w > neg_w) node->label = 1; else if (pos_w < neg_w) node->label = -1; else if (node->parent) node->label = node->parent->label; else { //printf("Root node is evenly balanced.\n"); node->label = 0; } //If branch is small or almost pure, make leaf if (n < min_points) { //printf("small branch: %d points\n", n, level); return; } else if (level == max_level) { //printf("leaf node: level = max\n"); return; } else if (pos == 0 || neg == 0) { //printf("pure node\n"); return; } ///////////////TEST////////////////// //printf("LEVEL: %d\n", level); //printf("pos=%d, neg=%d, posw=%f, negw=%f, lab=%f\n", pos, neg, pos_w, neg_w, node->label); //printf("GINI: %f\n", GINI(pos_w, tot)); ///////////////////////////////////// int col; int row; //best row to split at for particular column/feature int localrow; //first + localrow = row; receives BestSplit which returns integer in [-1, n-1] double threshold; //best threshold to split at for column/feature double impurity; //impurity for best split in feature/column int bestcol = -1; //feature with best split int bestrow = first+n-1; //best row to split for best feature double bestthresh; //threshold split for best feature (data[bestrow][bestcol]) double Pmin = GINI(pos_w, tot); //minimum impurity seen so far //Sort table. Then find best column/feature, threshold, and impurity for (col = 0; col < D-1; ++col) { //printf("\r%5d/%5d", col, D); //fflush(stdout); get_timestamp(&sort_start); Sort(data, first, first+n-1, col); get_timestamp(&sort_stop); get_timestamp(&split_start); localrow = WeightedBestSplit(data, n, first, col, pos_w, tot, &impurity); get_timestamp(&split_stop); sort_time += timestamp_diff_in_seconds(sort_start, sort_stop); split_time += timestamp_diff_in_seconds(split_start, split_stop); row = first + localrow; threshold = data[row][col]; //If current column has better impurity, save col, thresh, and Pmin if (impurity < Pmin) { bestcol = col; bestrow = row; bestthresh = threshold; Pmin = impurity; } } //printf("\r \r"); //printf("Sort time: %f sec\nSplit time: %f sec\n", sort_time, split_time); //If splitting doesn't improve purity (best split is at the end) stop if (bestrow == first+n-1) { //printf("no improvement\n"); return; } Sort(data, first, first+n-1, bestcol); //For feature, threshold with best impurity, save to node attributes node->index = bestcol; node->threshold = bestthresh; printf("Best feature: %d, Best thresh: %f, Impurity: %f\n", node->index, node->threshold, Pmin); //Create right and left children Node *l = malloc(sizeof(Node)); Node *r = malloc(sizeof(Node)); l->parent = node; r->parent = node; l->right = NULL; l->left = NULL; r->right = NULL; r->left = NULL; node->left = l; node->right = r; int first_r = bestrow+1; int n_l = first_r - first; int n_r = n - n_l; //printf("LEFT\n"); SplitNode(l, data, n_l, first, level+1); //printf("RIGHT\n"); SplitNode(r, data, n_r, first_r, level+1); return; }
/********************************************************************** * TABMAPIndexBlock::AddEntry() * * Recursively search the tree until we encounter the best leaf to * contain the specified object MBR and add the new entry to it. * * In the even that the selected leaf node would be full, then it will be * split and this split can propagate up to its parent, etc. * * If bAddInThisNodeOnly=TRUE, then the entry is added only locally and * we do not try to update the child node. This is used when the parent * of a node that is being splitted has to be updated. * * Returns 0 on success, -1 on error. **********************************************************************/ int TABMAPIndexBlock::AddEntry(GInt32 nXMin, GInt32 nYMin, GInt32 nXMax, GInt32 nYMax, GInt32 nBlockPtr, GBool bAddInThisNodeOnly /*=FALSE*/) { int i; GBool bFound = FALSE; if (m_eAccess != TABWrite && m_eAccess != TABReadWrite) { CPLError(CE_Failure, CPLE_AssertionFailed, "Failed adding index entry: File not opened for write access."); return -1; } /*----------------------------------------------------------------- * Update MBR now... even if we're going to split current node later. *----------------------------------------------------------------*/ if (nXMin < m_nMinX) m_nMinX = nXMin; if (nXMax > m_nMaxX) m_nMaxX = nXMax; if (nYMin < m_nMinY) m_nMinY = nYMin; if (nYMax > m_nMaxY) m_nMaxY = nYMax; /*----------------------------------------------------------------- * Look for the best candidate to contain the new entry * __TODO__ For now we'll just look for the first entry that can * contain the MBR, but we could probably have a better * search criteria to optimize the resulting tree *----------------------------------------------------------------*/ /*----------------------------------------------------------------- * If bAddInThisNodeOnly=TRUE then we add the entry only locally * and do not need to look for the proper leaf to insert it. *----------------------------------------------------------------*/ if (bAddInThisNodeOnly) bFound = TRUE; /*----------------------------------------------------------------- * First check if current child could be a valid candidate. *----------------------------------------------------------------*/ if (!bFound && m_poCurChild && (m_asEntries[m_nCurChildIndex].XMin <= nXMin && m_asEntries[m_nCurChildIndex].XMax >= nXMax && m_asEntries[m_nCurChildIndex].YMin <= nYMin && m_asEntries[m_nCurChildIndex].YMax >= nYMax ) ) { bFound = TRUE; } /*----------------------------------------------------------------- * Scan all entries to find a valid candidate * We look for the entry whose center is the closest to the center * of the object to add. *----------------------------------------------------------------*/ if (!bFound) { int nObjCenterX = (nXMin + nXMax)/2; int nObjCenterY = (nYMin + nYMax)/2; // Make sure blocks currently in memory are written to disk. if (m_poCurChild) { m_poCurChild->CommitToFile(); delete m_poCurChild; m_poCurChild = NULL; m_nCurChildIndex = -1; } // Look for entry whose center is closest to center of new object int nBestCandidate = -1; int nMinDist = 2000000000; for(i=0; i<m_numEntries; i++) { int nX = (m_asEntries[i].XMin + m_asEntries[i].XMax)/2; int nY = (m_asEntries[i].YMin + m_asEntries[i].YMax)/2; int nDist = (nX-nObjCenterX)*(nX-nObjCenterX) + (nY-nObjCenterY)*(nY-nObjCenterY); if (nBestCandidate==-1 || nDist < nMinDist) { nBestCandidate = i; nMinDist = nDist; } } if (nBestCandidate != -1) { // Try to load corresponding child... if it fails then we are // likely in a leaf node, so we'll add the new entry in the current // node. TABRawBinBlock *poBlock = NULL; // Prevent error message if referred block not committed yet. CPLPushErrorHandler(CPLQuietErrorHandler); if ((poBlock = TABCreateMAPBlockFromFile(m_fp, m_asEntries[nBestCandidate].nBlockPtr, 512, TRUE, TABReadWrite)) && poBlock->GetBlockClass() == TABMAP_INDEX_BLOCK) { m_poCurChild = (TABMAPIndexBlock*)poBlock; poBlock = NULL; m_nCurChildIndex = nBestCandidate; m_poCurChild->SetParentRef(this); m_poCurChild->SetMAPBlockManagerRef(m_poBlockManagerRef); bFound = TRUE; } if (poBlock) delete poBlock; CPLPopErrorHandler(); CPLErrorReset(); } } if (bFound && !bAddInThisNodeOnly) { /*------------------------------------------------------------- * Found a child leaf... pass the call to it. *------------------------------------------------------------*/ if (m_poCurChild->AddEntry(nXMin, nYMin, nXMax, nYMax, nBlockPtr) != 0) return -1; } else { /*------------------------------------------------------------- * Found no child to store new object... we're likely at the leaf * level so we'll store new object in current node *------------------------------------------------------------*/ /*------------------------------------------------------------- * First thing to do is make sure that there is room for a new * entry in this node, and to split it if necessary. *------------------------------------------------------------*/ if (GetNumFreeEntries() < 1) { if (m_poParentRef == NULL) { /*----------------------------------------------------- * Splitting the root node adds one level to the tree, so * after splitting we just redirect the call to the new * child that's just been created. *----------------------------------------------------*/ if (SplitRootNode((nXMin+nXMax)/2, (nYMin+nYMax)/2) != 0) return -1; // Error happened and has already been reported CPLAssert(m_poCurChild); return m_poCurChild->AddEntry(nXMin, nYMin, nXMax, nYMax, nBlockPtr, TRUE); } else { /*----------------------------------------------------- * Splitting a regular node *----------------------------------------------------*/ if (SplitNode((nXMin+nXMax)/2, (nYMin+nYMax)/2) != 0) return -1; } } if (InsertEntry(nXMin, nYMin, nXMax, nYMax, nBlockPtr) != 0) return -1; } /*----------------------------------------------------------------- * Update current node MBR and the reference to it in our parent. *----------------------------------------------------------------*/ RecomputeMBR(); return 0; }
void Tree::Train(vector<Sample> &samples, const Mat_<double> &meanShape, int stages_, int landmarkID_ ) { // set parameters landmarkID = landmarkID_; numFeats = GlobalParams::numFeats[stages_]; radioRadius = GlobalParams::radius[stages_]; numNodes = 1; numLeafNodes = 1; // index: indicates the training samples id in training data set int num_nodes_iter; int num_split; for (int i = 0; i < samples.size(); i++) { // push the indies of training samples into root node nodes[0].sample_idx.push_back(i); } // initialize the root nodes[0].isSplit = false; nodes[0].pNodeID = 0; nodes[0].depth = 1; nodes[0].cNodesID[0] = 0; nodes[0].cNodesID[1] = 0; nodes[0].isLeaf = true; nodes[0].threshold = 0; nodes[0].feat[0].x = 1; nodes[0].feat[0].y = 1; nodes[0].feat[1].x = 1; nodes[0].feat[1].y = 1; bool stop = false; int num_nodes = 1; int num_leafnodes = 1; double thresh; Point2d feat[2]; vector<int> lcID, rcID; lcID.reserve(nodes[0].sample_idx.size()); rcID.reserve(nodes[0].sample_idx.size()); while (!stop) { num_nodes_iter = num_nodes; num_split = 0; for (int n = 0; n < num_nodes_iter; n++) { if (!nodes[n].isSplit) { if (nodes[n].depth == maxDepth) { nodes[n].isSplit = true; } } else { // separate the training samples into left and right path // splite the tree // In each internal node, we randomly choose to either minimize the // binary entropy for classification (with probablity p) // or the variance of ficial point increments for regression // (with probability 1-p) RNG randonGenerator(getTickCount()); double p = 1 - 0.1 * stages_; double val = randonGenerator.uniform(0.0, 1.0); if (val <= p) { SplitNode(CLASSIFICATION, samples, meanShape, nodes[n].sample_idx, thresh, feat, lcID, rcID); } else { SplitNode(REGRESSION, samples, meanShape, nodes[n].sample_idx, thresh, feat, lcID, rcID); } // set the threshold and feature for current node nodes[n].feat[0] = feat[0]; nodes[n].feat[1] = feat[1]; nodes[n].threshold = thresh; nodes[n].isSplit = true; nodes[n].isLeaf = false; nodes[n].cNodesID[0] = num_nodes; nodes[n].cNodesID[1] = num_nodes + 1; // add left and right child into the random tree nodes[num_nodes].sample_idx = lcID; nodes[num_nodes].isSplit = false; nodes[num_nodes].pNodeID = n; nodes[num_nodes].depth = nodes[n].depth + 1; nodes[num_nodes].cNodesID[0] = 0; nodes[num_nodes].cNodesID[1] = 0; nodes[num_nodes].isLeaf = true; nodes[num_nodes + 1].sample_idx = rcID; nodes[num_nodes + 1].isSplit = false; nodes[num_nodes + 1].pNodeID = n; nodes[num_nodes + 1].depth = nodes[n].depth + 1; nodes[num_nodes + 1].cNodesID[0] = 0; nodes[num_nodes + 1].cNodesID[1] = 0; nodes[num_nodes + 1].isLeaf = true; num_split++; num_leafnodes++; num_nodes += 2; } } if (num_split == 0) { stop = 1; } else { numNodes = num_nodes; numLeafNodes = num_leafnodes; } } // mark leaf nodes. // clear sample indices in each node leafID.clear(); for (int i = 0; i < numNodes; i++) { nodes[i].sample_idx.clear(); if (nodes[i].isLeaf) { leafID.push_back(i); } } }
// *********************************************************************************** // //Train 2 nodes based on their parent's children void SOTA::PreOrderBuildNodes(TreeNode* curr) { int j; Child* c; //Start with a node... split it if there are more than one children in the current parent if(curr->members==2){ SplitNode(curr); if(curr->left->members>0 && curr->right->members>0) numLeavesNonZero++; if(!treeTesting) {// printf("\nLeaves: %d, NZLeaves: %.0lf, Split: %d, LeftAfterSplit: %did %dm, RightAfterSplit: %did %dm\n", numLeaves, numLeavesNonZero, curr->nodeID, curr->left->nodeID, curr->left->members, curr->right->nodeID, curr->right->members); // curr->left->profile->PrintMotifConsensus(); // curr->right->profile->PrintMotifConsensus(); } //Recursion PreOrderBuildNodes(curr->left); PreOrderBuildNodes(curr->right); }else if(curr->members>1 && CalcAvgIntPairwise(curr)<intSimThres){ SplitNode(curr); //Parent now split... train the new leaves int z,b; double winningScore=0, winningPVal=0, lowestIntSim, lastNLNZ=0; TreeNode* winner=NULL; TreeNode* lowSimNode=NULL; char tmpName[STR_LEN]; int mi1, mi2; double pS; bool mforward1, mforward2; t=0; do{ //Reset the nodes InorderReset(curr); //Find & update winners for(c=curr->progeny; c!=NULL; c=c->next) { winningScore=-100000; winningPVal=-100000; winner=NULL; InorderFindWinner(curr, motifSet[c->mID], winner, mi1, mi2, mforward1, mforward2, winningScore, winningPVal); //add the motif to the winning node if(winner->members==0){ if(winner->alignment!=NULL) delete winner->alignment; winner->alignment = new MultiAlignRec(1, motifSet[c->mID]->GetLen()); strcpy(winner->alignment->alignedNames[0], motifSet[c->mID]->name); strcpy(winner->alignment->profileAlignment[0]->name, motifSet[c->mID]->name); winner->alignment->alignedIDs[0] = c->mID; winner->members=1; winner->avgPval=winningPVal; //initialise the alignment for(z=0; z<motifSet[c->mID]->GetLen(); z++) for(b=0; b<B; b++) winner->alignment->profileAlignment[0]->f[z][b]=motifSet[c->mID]->f[z][b]; }else{ //Add a motif to an existing alignment winner->alignment = MAman->SingleProfileAddition(winner->alignment, Plat->inputMotifs[c->mID], c->mID); winner->members++; winner->avgPval = (winner->avgPval*(((double)winner->members-1)/(double)winner->members))+(winningPVal*(1/(double)winner->members)); } } //Adjust the models (Neighbourhood update) InorderAdjustModels(curr); t++; total_t++; }while(t<CYCLE_MAX && total_t<MAX_T); //Add the children to each leaf for(j=0; j<curr->left->members; j++){ Child* tmp=new Child(); tmp->next = curr->left->progeny; tmp->m=motifSet[curr->left->alignment->alignedIDs[j]]; tmp->mID=curr->left->alignment->alignedIDs[j]; curr->left->progeny = tmp; }for(j=0; j<curr->right->members; j++){ Child* tmp=new Child(); tmp->next = curr->right->progeny; tmp->m=motifSet[curr->right->alignment->alignedIDs[j]]; tmp->mID=curr->right->alignment->alignedIDs[j]; curr->right->progeny = tmp; } if(curr->left->members>0 && curr->right->members>0) numLeavesNonZero++; if(!treeTesting) {// printf("\nLeaves: %d, NZLeaves: %.0lf, Split: %d, LeftAfterSplit: %did %dm, RightAfterSplit: %did %dm\n", numLeaves, numLeavesNonZero, curr->nodeID, curr->left->nodeID, curr->left->members, curr->right->nodeID, curr->right->members); // curr->left->profile->PrintMotifConsensus(); // curr->right->profile->PrintMotifConsensus(); } //Recursion PreOrderBuildNodes(curr->left); PreOrderBuildNodes(curr->right); } }