예제 #1
0
Examples<EXAMPLE>::Examples(const Examples<EXAMPLE2>& e) {
	Debug::log(2) << "Examples<" << EXAMPLE::name() << ">::Examples(Examples<" << EXAMPLE2::name() << "> [" << e.size() << "])...\n";
	Debug::log(3) << stats::resource_usage() << "\n";
	this->reserve(e.size());
	this->insert(this->begin(), e.begin(), e.end());
	assert(this->size() == e.size());
	Debug::log(2) << "...Examples<" << EXAMPLE::name() << ">::Examples(Examples<" << EXAMPLE2::name() << "> [" << e.size() << "])\n";
	Debug::log(2) << stats::resource_usage() << "\n";
}
예제 #2
0
int main(int argc, char** argv) {
	string filename = argv[1], _eps = argv[2];
	double eps = stod(_eps);
	ifstream fin(filename);
	string line;
	Examples e;
	Appear app;
	while (getline(fin, line)) {
		istringstream sin(line);
		int ans;
		sin >> ans;
		e.PB(Example(ans));
		
		int key;
		char c;
		Feature val;
		while (sin >> key >> c >> val) {
			e[e.size() - 1].feat[key] = val;
			app.insert(key);
		}
	}
	
	exPtrs p;
	for (int i = 0; i < e.size(); i++)
		p.PB(&e[i]);

	Tree *root = decide(p, eps, app);
	
	cout << "int tree_predict(double *attr) {" << endl;
	print(root, 1);
	cout << "}" << endl;
	
	delete root;
	
	return 0;
}
예제 #3
0
/// Weight the leaves of this tree, and update the Example weights.
/// \todo Backprune splits that don't reduce loss, and backprune leaves
/// that don't have enough weight/exmpls to meet the initial splitting
/// criteria
/// \todo Weight the internal nodes too, for debugging purposes?
template<typename EXAMPLE> void Tree::weight_leaves_and_update_examples(Examples<EXAMPLE>& exmpls) {
	Debug::log(1) << "\nTree::weight_leaves_and_update_examples(Examples<" << EXAMPLE::name() << ">)...\n";

	vector<Node>::iterator n;
	Double orig_total_weight;

	hash_map<NodeID, set<ID<Sentence> > > sentences;
	hash_map<NodeID, ExamplePtrs<EXAMPLE> > leaves;

	unsigned totcnt = 0;
	for(typename Examples<EXAMPLE>::iterator ex = exmpls.begin(); ex != exmpls.end(); ex++) {
		// Find the node that e falls into.
		const Node* n = this->find_leaf(*ex);
		assert(n->is_leaf());

		assert(n->id() != NO_NODE);
		sentences[n->id()].insert(ex->sentence());
		sentences[NO_NODE].insert(ex->sentence());

		leaves[n->id()].push_back(&(*ex));

		// FIXME: This won't work if there's noise
		orig_total_weight += ex->weight();

		totcnt++;
		if (totcnt % 100000 == 0)
			Debug::log(3) << "\tProcessed " << totcnt << " examples in Tree::weight_leaves()\n";
		if (totcnt % 10000 == 0)
			Debug::log(4) << "\tProcessed " << totcnt << " examples in Tree::weight_leaves()\n";
	}

	Debug::log(2) << "Done processing " << totcnt << " examples in Tree::weight_leaves()\n";

	// Compute the confidence for each leaf.
	unsigned leafcnt = 0;
	unsigned sentence_cnt = 0;
	unsigned example_cnt = 0;
	for (n = nodes.begin(); n != nodes.end(); n++) {
		if (n->is_leaf()) {
			assert(leaves.find(n->id()) != leaves.end());

			const ExamplePtrs<EXAMPLE>& leaf_examples = leaves.find(n->id())->second;
			double orig_unpenalized_loss = leaf_examples.unpenalized_loss();
			Weights initial_weight = leaf_examples.initial_weight();

			example_cnt += leaf_examples.size();
			n->set_confidence_and_initial_weights(leaf_examples);

			// Add the leaf confidence to the leaf Examples.
			leaves.find(n->id())->second.add_confidence(n->confidence());

			// Update the confidence of this leaf's Example%s.
			double unpenalized_loss = leaf_examples.unpenalized_loss();


			leafcnt++;
//			Debug::log(2) << "Weighted leaf:\n";
//			Debug::log(2) << n->to_string("\t");
			Debug::log(2) << n->to_string();
			assert(sentences.find(n->id()) != sentences.end());
			sentence_cnt += sentences.find(n->id())->second.size();
			Debug::log(2) << "\t" << leaf_examples.size() << " examples from ";
			Debug::log(2) << sentences.find(n->id())->second.size() << " different sentences\n";

			double penalty = n->penalty();
			Debug::log(2) << "\tloss = " << unpenalized_loss + penalty << " = " << unpenalized_loss << " (unpenalized loss) + " << penalty << " (penalty)";
			if (n->confidence() != 0) Debug::log(2) << "  (conf=0 loss was " << orig_unpenalized_loss << ")";
			Debug::log(2) << "\n";
		}
	}
	assert(example_cnt == exmpls.size());
	Debug::log(2) << "Examples from " << sentences[NO_NODE].size() << " different sentences.\n";

	Debug::log(2) << "Done weighting " << leafcnt << " leaves in Tree::weight_leaves()\n";
	Debug::log(2) << stats::resource_usage() << "\n";

	// WRITEME: Backprune splits that don't reduce loss

	Debug::log(1) << "...Tree::weight_leaves_and_update_examples(Examples<" << EXAMPLE::name() << ">)\n";
}