Examples<EXAMPLE>::Examples(const Examples<EXAMPLE2>& e) { Debug::log(2) << "Examples<" << EXAMPLE::name() << ">::Examples(Examples<" << EXAMPLE2::name() << "> [" << e.size() << "])...\n"; Debug::log(3) << stats::resource_usage() << "\n"; this->reserve(e.size()); this->insert(this->begin(), e.begin(), e.end()); assert(this->size() == e.size()); Debug::log(2) << "...Examples<" << EXAMPLE::name() << ">::Examples(Examples<" << EXAMPLE2::name() << "> [" << e.size() << "])\n"; Debug::log(2) << stats::resource_usage() << "\n"; }
int main(int argc, char** argv) { string filename = argv[1], _eps = argv[2]; double eps = stod(_eps); ifstream fin(filename); string line; Examples e; Appear app; while (getline(fin, line)) { istringstream sin(line); int ans; sin >> ans; e.PB(Example(ans)); int key; char c; Feature val; while (sin >> key >> c >> val) { e[e.size() - 1].feat[key] = val; app.insert(key); } } exPtrs p; for (int i = 0; i < e.size(); i++) p.PB(&e[i]); Tree *root = decide(p, eps, app); cout << "int tree_predict(double *attr) {" << endl; print(root, 1); cout << "}" << endl; delete root; return 0; }
/// Weight the leaves of this tree, and update the Example weights. /// \todo Backprune splits that don't reduce loss, and backprune leaves /// that don't have enough weight/exmpls to meet the initial splitting /// criteria /// \todo Weight the internal nodes too, for debugging purposes? template<typename EXAMPLE> void Tree::weight_leaves_and_update_examples(Examples<EXAMPLE>& exmpls) { Debug::log(1) << "\nTree::weight_leaves_and_update_examples(Examples<" << EXAMPLE::name() << ">)...\n"; vector<Node>::iterator n; Double orig_total_weight; hash_map<NodeID, set<ID<Sentence> > > sentences; hash_map<NodeID, ExamplePtrs<EXAMPLE> > leaves; unsigned totcnt = 0; for(typename Examples<EXAMPLE>::iterator ex = exmpls.begin(); ex != exmpls.end(); ex++) { // Find the node that e falls into. const Node* n = this->find_leaf(*ex); assert(n->is_leaf()); assert(n->id() != NO_NODE); sentences[n->id()].insert(ex->sentence()); sentences[NO_NODE].insert(ex->sentence()); leaves[n->id()].push_back(&(*ex)); // FIXME: This won't work if there's noise orig_total_weight += ex->weight(); totcnt++; if (totcnt % 100000 == 0) Debug::log(3) << "\tProcessed " << totcnt << " examples in Tree::weight_leaves()\n"; if (totcnt % 10000 == 0) Debug::log(4) << "\tProcessed " << totcnt << " examples in Tree::weight_leaves()\n"; } Debug::log(2) << "Done processing " << totcnt << " examples in Tree::weight_leaves()\n"; // Compute the confidence for each leaf. unsigned leafcnt = 0; unsigned sentence_cnt = 0; unsigned example_cnt = 0; for (n = nodes.begin(); n != nodes.end(); n++) { if (n->is_leaf()) { assert(leaves.find(n->id()) != leaves.end()); const ExamplePtrs<EXAMPLE>& leaf_examples = leaves.find(n->id())->second; double orig_unpenalized_loss = leaf_examples.unpenalized_loss(); Weights initial_weight = leaf_examples.initial_weight(); example_cnt += leaf_examples.size(); n->set_confidence_and_initial_weights(leaf_examples); // Add the leaf confidence to the leaf Examples. leaves.find(n->id())->second.add_confidence(n->confidence()); // Update the confidence of this leaf's Example%s. double unpenalized_loss = leaf_examples.unpenalized_loss(); leafcnt++; // Debug::log(2) << "Weighted leaf:\n"; // Debug::log(2) << n->to_string("\t"); Debug::log(2) << n->to_string(); assert(sentences.find(n->id()) != sentences.end()); sentence_cnt += sentences.find(n->id())->second.size(); Debug::log(2) << "\t" << leaf_examples.size() << " examples from "; Debug::log(2) << sentences.find(n->id())->second.size() << " different sentences\n"; double penalty = n->penalty(); Debug::log(2) << "\tloss = " << unpenalized_loss + penalty << " = " << unpenalized_loss << " (unpenalized loss) + " << penalty << " (penalty)"; if (n->confidence() != 0) Debug::log(2) << " (conf=0 loss was " << orig_unpenalized_loss << ")"; Debug::log(2) << "\n"; } } assert(example_cnt == exmpls.size()); Debug::log(2) << "Examples from " << sentences[NO_NODE].size() << " different sentences.\n"; Debug::log(2) << "Done weighting " << leafcnt << " leaves in Tree::weight_leaves()\n"; Debug::log(2) << stats::resource_usage() << "\n"; // WRITEME: Backprune splits that don't reduce loss Debug::log(1) << "...Tree::weight_leaves_and_update_examples(Examples<" << EXAMPLE::name() << ">)\n"; }