void heapify_down_edge_heap(struct Edge_Heap *heap, int index) {
	if (index >= heap->curr_size)
		return;
	int left_index = left_child_index(index);
	int right_index = right_child_index(index);
	int min = index;
	if (left_index < heap->curr_size
			&& heap->A[min].weight < heap->A[left_index].weight) {
		min = left_index;
	}
	if (right_index < heap->curr_size
			&& heap->A[min].weight < heap->A[right_index].weight) {
		min = right_index;
	}
	if (min == index)
		return;
	swap_edge_heap_elements(heap, index, min);
	heapify_down_edge_heap(heap, min);
}
Example #2
0
void RegressionNode<FeatT, LabT, SplitT, SplFitterT>::train(const RegressionTree<FeatT, LabT, SplitT, SplFitterT> & tree,
        const feature_mtx<FeatT> & features,
        const label_mtx<LabT> & labels,
        const data_indices_vec & data_indices,
        const TreeOptions & tree_opts,
        SplFitterT<FeatT, LabT> * fitter,
        const util::MultiDimGaussianX<LabT> * const _dist) {
// Store the indices which pass through this node - this should do a copy. I hope!
training_data_indices = data_indices;
//LOG(INFO)
#ifdef VERBOSE
std::cout << "[t" << tree.tree_id << ":" << node_id << "] got " << num_training_datapoints()
          << " datapoints: [" << data_indices.transpose() << "]" << std::endl;
#endif
if (_dist == NULL) {
    //LOG(INFO)
#ifdef VERBOSE
    std::cout << "[t" << tree.tree_id << ":" << node_id << "] no dist provided, calculating..." << std::endl;
#endif
    dist.fit_params(labels, data_indices);
}
else {
    //LOG(INFO)
#ifdef VERBOSE
    std::cout << "[t" << tree.tree_id << ":" << node_id << "] using provided distribution" << std::endl;
#endif
    dist.mean = _dist->mean;
    dist.cov = _dist->cov;
}
//LOG(INFO)
#ifdef VERBOSE
std::cout << "[t" << tree.tree_id << ":" << node_id << "] dist = " << dist << std::endl;
#endif

// std::cout << "[t" << tree.tree_id << ":" << node_id << "] #0, 0: " << features.coeff(0, 0) << " @ " << &features.coeff(0, 0) << std::endl;

// Check whether to stop growing now. NB: even if this returns false, we might
// still stop growing if we cannot find a decent split (see below)
if (stopping_conditions_reached(tree_opts)) {
    return;
}

// get the indices going left and right from splitter object. We declare
// them on the stack here, so that they are cleaned up at the end of this call to train()
// automatically
data_indices_vec right_child_indices;
data_indices_vec left_child_indices;

// bool good_split_found = true;
// std::cout << "[t" << tree.tree_id << ":" << node_id << "] choose_split_parameters" << std::endl;
bool good_split_found = fitter->choose_split_parameters(features, labels, data_indices, dist,
                        &split, &left_child_indices, &right_child_indices);

if (!good_split_found) {
    //LOG(ERROR)
#ifdef VERBOSE_
    std::cout << "[t" << tree.tree_id << ":" << node_id << "] didn't find a good split, stopping" << std::endl;
#endif
    return;
}

is_leaf = false;

// If we are here then assume we found decent splits, indices of which
// are stored in left_child_indices and right_child_indices. First create child nodes, then
// do the training. FIXME: we could increase efficiency (slightly!) but
left.reset(new RegressionNode<FeatT, LabT, SplitT, SplFitterT>(left_child_index(), this,
           labels.cols(), depth + 1));
right.reset(new RegressionNode<FeatT, LabT, SplitT, SplFitterT>(right_child_index(), this,
            labels.cols(), depth + 1));
left->train(tree, features, labels, left_child_indices, tree_opts, fitter);
right->train(tree, features, labels, right_child_indices, tree_opts, fitter);
}