示例#1
0
文件: tree.c 项目: esiqveland/skole
void prune_node(node_t* node, node_t** simpler) {
    if(node == NULL) {
        // update simpler so that if we are pruning,
        // the pointer pointing to this node, will point to this node instead
        *simpler = node;
        return;
    }

    if(is_prunable(node)) {
        // update the pointer pointing to the node we want to point to instead
        //prune_node(node->children[0], simpler);
        for(int i = 0; i < node->n_children; i++) {
            prune_node(node->children[i], simpler);
        }
        // clean up node we are removing
        node_finalize(node);
    } else {
        // recurse down the children and update pointers
        for (int i = 0; i < node->n_children; ++i) {
            node_t* child = node->children[i];
            prune_node(child, simpler);
            // update the child pointer we are exploring, in case it was prunable
            node->children[i] = *simpler;
        }
        *simpler = node;
    }

}
示例#2
0
文件: tree.c 项目: esiqveland/skole
void
simplify_tree ( node_t **simplified, node_t *root )
{
    /* TODO: implement the simplifications of the tree here */
    *simplified = root;

    // prune all redundant nodes first
    prune_node(root, simplified);

    *simplified = root;
    // go through the whole tree, doing our work on each node
    dfs(root, root);
}
示例#3
0
// this is a public function for attempting to prune the decision tree and
// improve classification
int dt_prune(decision_tree *dt, data_set *validation_data) {
    return prune_node(dt, dt->root, validation_data);
}
示例#4
0
// this is a private function that recursively prunes nodes top-down
// and only accepts a pruning if it increases the prediction score of the
// validation data
// returns the number of nodes successfully pruned
int prune_node(decision_tree *dt, dt_node *node, data_set *validation_data) {
    // the score with both subtrees still attached
    float primary_score = dt_score(dt, validation_data);

    // save subtrees so that we can restore them if classification score
    // didn't improve
    dt_node *left = node->left;
    dt_node *right = node->right;
    int right_prune_count = 0;
    int left_prune_count = 0;

    if(left != NULL) {
        node->left = NULL;

        // score the decision tree with the missing subtree
        float left_prune_score = dt_score(dt, validation_data);
        if(left_prune_score >= primary_score) {
            // found a good prune!
            left_prune_count = count_nodes(left);
            float diff = left_prune_score - primary_score;
            if(diff > 0.0002 || left_prune_count > 10) {
                printf("Improved score by %.4f, dropped %d nodes\n",
                        diff, left_prune_count);
            }
            // throw away the subtree now that we don't need it
            dt_free_node(left);
        }
        else {
            // prune was no good, so restore the subtree and recurse
            node->left = left;
            left_prune_count = prune_node(dt, node->left, validation_data);
        }
    }

    if(right != NULL) {
        // basically the same as above, but for the right subtree
        node->right = NULL;

        float right_prune_score = dt_score(dt, validation_data);
        if(right_prune_score >= primary_score) {
            right_prune_count = count_nodes(right);
            float diff = right_prune_score - primary_score;
            if(diff > 0.0002 || right_prune_count > 10) {
                printf("Improved score by %.4f, dropped %d nodes\n",
                        diff, right_prune_count);
            }
            dt_free_node(right);
        }
        else {
            node->right = right;
            right_prune_count = prune_node(dt, node->right, validation_data);
        }
    }

    // need to see if we're a leaf now
    if(node->left == NULL && node->right == NULL) {
        node->prediction_value = guess_node_class(dt, node);
        node->is_leaf = 1;
    }

    return left_prune_count + right_prune_count;
}