void heapify_down_edge_heap(struct Edge_Heap *heap, int index) { if (index >= heap->curr_size) return; int left_index = left_child_index(index); int right_index = right_child_index(index); int min = index; if (left_index < heap->curr_size && heap->A[min].weight < heap->A[left_index].weight) { min = left_index; } if (right_index < heap->curr_size && heap->A[min].weight < heap->A[right_index].weight) { min = right_index; } if (min == index) return; swap_edge_heap_elements(heap, index, min); heapify_down_edge_heap(heap, min); }
void RegressionNode<FeatT, LabT, SplitT, SplFitterT>::train(const RegressionTree<FeatT, LabT, SplitT, SplFitterT> & tree, const feature_mtx<FeatT> & features, const label_mtx<LabT> & labels, const data_indices_vec & data_indices, const TreeOptions & tree_opts, SplFitterT<FeatT, LabT> * fitter, const util::MultiDimGaussianX<LabT> * const _dist) { // Store the indices which pass through this node - this should do a copy. I hope! training_data_indices = data_indices; //LOG(INFO) #ifdef VERBOSE std::cout << "[t" << tree.tree_id << ":" << node_id << "] got " << num_training_datapoints() << " datapoints: [" << data_indices.transpose() << "]" << std::endl; #endif if (_dist == NULL) { //LOG(INFO) #ifdef VERBOSE std::cout << "[t" << tree.tree_id << ":" << node_id << "] no dist provided, calculating..." << std::endl; #endif dist.fit_params(labels, data_indices); } else { //LOG(INFO) #ifdef VERBOSE std::cout << "[t" << tree.tree_id << ":" << node_id << "] using provided distribution" << std::endl; #endif dist.mean = _dist->mean; dist.cov = _dist->cov; } //LOG(INFO) #ifdef VERBOSE std::cout << "[t" << tree.tree_id << ":" << node_id << "] dist = " << dist << std::endl; #endif // std::cout << "[t" << tree.tree_id << ":" << node_id << "] #0, 0: " << features.coeff(0, 0) << " @ " << &features.coeff(0, 0) << std::endl; // Check whether to stop growing now. NB: even if this returns false, we might // still stop growing if we cannot find a decent split (see below) if (stopping_conditions_reached(tree_opts)) { return; } // get the indices going left and right from splitter object. We declare // them on the stack here, so that they are cleaned up at the end of this call to train() // automatically data_indices_vec right_child_indices; data_indices_vec left_child_indices; // bool good_split_found = true; // std::cout << "[t" << tree.tree_id << ":" << node_id << "] choose_split_parameters" << std::endl; bool good_split_found = fitter->choose_split_parameters(features, labels, data_indices, dist, &split, &left_child_indices, &right_child_indices); if (!good_split_found) { //LOG(ERROR) #ifdef VERBOSE_ std::cout << "[t" << tree.tree_id << ":" << node_id << "] didn't find a good split, stopping" << std::endl; #endif return; } is_leaf = false; // If we are here then assume we found decent splits, indices of which // are stored in left_child_indices and right_child_indices. First create child nodes, then // do the training. FIXME: we could increase efficiency (slightly!) but left.reset(new RegressionNode<FeatT, LabT, SplitT, SplFitterT>(left_child_index(), this, labels.cols(), depth + 1)); right.reset(new RegressionNode<FeatT, LabT, SplitT, SplFitterT>(right_child_index(), this, labels.cols(), depth + 1)); left->train(tree, features, labels, left_child_indices, tree_opts, fitter); right->train(tree, features, labels, right_child_indices, tree_opts, fitter); }